From 2cddc72f7aa1eab43c69250e608d662909383ba7 Mon Sep 17 00:00:00 2001 From: Keith Davis Date: Thu, 7 Apr 2022 11:32:00 +0100 Subject: IVGCVSW-6124 ConstTensorsAsInput: Conv2d - FrontEnd * Update Front-end and Tools. * Updated Serializer, Deserializer and unit tests to reflect this. * Updated TfLiteDelegate, TfLiteParser and OnnxParser. * Updated Ref. * Fixed resulting Neon / CL tests * Unified optimizers for conv2d ops * Optimizer Fix - Fp32ToBf16 * Partial implementation for ACL backends to fix VTS failures !android-nn-driver:7477 Signed-off-by: Keith Davis Change-Id: I5fb18877f7ee32643e15a9818945356274bb401b --- delegate/src/Convolution.hpp | 32 +- include/armnn/Descriptors.hpp | 2 + include/armnn/ILayerVisitor.hpp | 10 + include/armnn/INetwork.hpp | 15 +- include/armnn/LayerVisitorBase.hpp | 4 + src/armnn/BackendHelper.cpp | 25 + src/armnn/Descriptors.cpp | 41 +- src/armnn/Graph.cpp | 14 +- src/armnn/Layer.cpp | 37 +- src/armnn/LoadedNetwork.cpp | 7 +- src/armnn/Network.cpp | 83 ++-- src/armnn/Network.hpp | 8 +- src/armnn/NetworkUtils.cpp | 9 + src/armnn/Tensor.cpp | 4 +- src/armnn/layers/Convolution2dLayer.cpp | 51 +-- src/armnn/layers/Convolution2dLayer.hpp | 6 +- src/armnn/optimizations/FoldPadIntoLayer2d.hpp | 58 ++- src/armnn/optimizations/FuseBatchNorm.hpp | 68 +-- .../RedirectMembersToConstantInputs.hpp | 1 + src/armnn/test/ConstTensorLayerVisitor.cpp | 48 +- src/armnn/test/ConstTensorLayerVisitor.hpp | 20 +- src/armnn/test/NetworkTests.cpp | 21 +- src/armnn/test/OptimizerTests.cpp | 62 ++- src/armnn/test/ShapeInferenceTests.cpp | 15 +- src/armnn/test/SubgraphViewTests.cpp | 72 ++- src/armnn/test/optimizations/FoldPadTests.cpp | 27 +- .../test/optimizations/FuseActivationTests.cpp | 5 +- .../test/optimizations/FuseBatchNormTests.cpp | 106 ++--- src/armnnDeserializer/Deserializer.cpp | 73 ++- .../test/DeserializeConvolution2d.cpp | 182 ++++++++ src/armnnOnnxParser/OnnxParser.cpp | 7 +- src/armnnSerializer/Serializer.cpp | 16 +- src/armnnSerializer/Serializer.hpp | 1 - src/armnnSerializer/test/SerializerTestUtils.cpp | 14 +- src/armnnSerializer/test/SerializerTests.cpp | 152 ++++++- src/armnnTestUtils/CommonTestUtils.cpp | 33 +- src/armnnTestUtils/CommonTestUtils.hpp | 5 +- src/armnnTestUtils/CreateWorkload.hpp | 124 +++-- src/armnnTestUtils/MockBackend.cpp | 1 + src/armnnTfLiteParser/TfLiteParser.cpp | 66 +-- src/backends/aclCommon/ArmComputeSubgraphUtils.hpp | 29 +- src/backends/backendsCommon/WorkloadData.cpp | 15 +- src/backends/backendsCommon/WorkloadFactory.cpp | 12 +- .../backendsCommon/test/DynamicBackendTests.hpp | 2 +- .../test/FullyConnectedEndToEndTestImpl.hpp | 39 +- .../test/LayerReleaseConstantDataTest.cpp | 99 ++-- .../backendsCommon/test/OptimizationViewsTests.cpp | 45 +- .../test/OptimizeSubgraphViewTests.cpp | 505 ++++++++++++++++----- .../backendsCommon/test/OptimizedNetworkTests.cpp | 3 + .../backendsCommon/test/WorkloadDataValidation.cpp | 93 +++- .../test/layerTests/Conv2dTestImpl.cpp | 114 ++++- src/backends/cl/ClBackend.cpp | 4 +- src/backends/cl/test/ClCreateWorkloadTests.cpp | 4 + src/backends/cl/test/ClImportTensorHandleTests.cpp | 8 +- .../cl/workloads/ClConvolution2dWorkload.cpp | 19 +- src/backends/neon/NeonBackend.cpp | 4 +- .../neon/workloads/NeonConvolution2dWorkload.cpp | 16 +- src/backends/reference/RefLayerSupport.cpp | 5 +- .../workloads/RefConvolution2dWorkload.cpp | 50 +- .../workloads/RefConvolution2dWorkload.hpp | 9 +- .../RefDepthwiseConvolution2dWorkload.hpp | 7 - src/profiling/test/ProfilingTestUtils.cpp | 301 +++++++++++- 62 files changed, 2101 insertions(+), 807 deletions(-) diff --git a/delegate/src/Convolution.hpp b/delegate/src/Convolution.hpp index 3b23d6d500..679f4dbe39 100644 --- a/delegate/src/Convolution.hpp +++ b/delegate/src/Convolution.hpp @@ -160,29 +160,29 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData, return isSupported ? kTfLiteOk : kTfLiteError; } - armnn::IConnectableLayer* layer = nullptr; - // Set up filter and biases + armnn::IConnectableLayer* layer = delegateData.m_Network->AddConvolution2dLayer(descriptor); + auto filter = CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[1]], filterTensorInfo, armnn::Optional()); - if(biasEnabled) - { - auto biases = - CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[2]], - biasTensorInfo, - armnn::Optional()); - layer = delegateData.m_Network->AddConvolution2dLayer(descriptor, - filter, - armnn::Optional(biases)); - } - else + armnn::IConnectableLayer* weightsLayer = delegateData.m_Network->AddConstantLayer(filter); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + weightsLayer->GetOutputSlot(0).SetTensorInfo(filterTensorInfo); + + if (biasEnabled) { - layer = delegateData.m_Network->AddConvolution2dLayer(descriptor, - filter, - armnn::EmptyOptional()); + const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]]; + if(tflite::IsConstantTensor(&tfLiteBiasTensor)) + { + auto biasTensor = CreateConstTensor(&tfLiteBiasTensor, biasTensorInfo); + armnn::IConnectableLayer* biasLayer = delegateData.m_Network->AddConstantLayer(biasTensor); + ARMNN_ASSERT(biasLayer != nullptr); + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u)); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensorInfo); + } } ARMNN_ASSERT(layer != nullptr); diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp index 4aa23a4ee0..7f46c6a79d 100644 --- a/include/armnn/Descriptors.hpp +++ b/include/armnn/Descriptors.hpp @@ -524,6 +524,8 @@ struct Convolution2dDescriptor : BaseDescriptor m_BiasEnabled == rhs.m_BiasEnabled && m_DataLayout == rhs.m_DataLayout; } + uint32_t GetNumInputs() const; + /// Padding left value in the width dimension. uint32_t m_PadLeft; diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp index d5e3d11fbd..a0c782e66e 100644 --- a/include/armnn/ILayerVisitor.hpp +++ b/include/armnn/ILayerVisitor.hpp @@ -98,6 +98,15 @@ public: const ConstTensor& input, const char* name = nullptr) = 0; + /// Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) + /// function is invoked. + /// @param layer - pointer to the layer which is calling back to this visit function. + /// @param convolution2dDescriptor - Description of the 2D convolution layer. + /// @param name - Optional name for the layer. + virtual void VisitConvolution2dLayer(const IConnectableLayer* layer, + const Convolution2dDescriptor& convolution2dDescriptor, + const char* name = nullptr) = 0; + /// Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) /// function is invoked. /// @param layer - pointer to the layer which is calling back to this visit function. @@ -105,6 +114,7 @@ public: /// @param weights - Tensor for the weights data. /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape. /// @param name - Optional name for the layer. + ARMNN_DEPRECATED_MSG("Use VisitConvolution2dLayer without ConstTensors") virtual void VisitConvolution2dLayer(const IConnectableLayer* layer, const Convolution2dDescriptor& convolution2dDescriptor, const ConstTensor& weights, diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 1d6276bfe7..89b4776d39 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -42,7 +42,7 @@ class IOutputSlot public: virtual unsigned int GetNumConnections() const = 0; virtual const IInputSlot* GetConnection(unsigned int index) const = 0; - virtual IInputSlot* GetConnection(unsigned int index) = 0; + virtual IInputSlot* GetConnection(unsigned int outputindex) = 0; virtual void SetTensorInfo(const TensorInfo& tensorInfo) = 0; virtual const TensorInfo& GetTensorInfo() const = 0; @@ -291,12 +291,20 @@ public: IConnectableLayer* AddConcatLayer(const ConcatDescriptor& concatDescriptor, const char* name = nullptr); + /// Adds a 2D convolution layer to the network. + /// @param convolution2dDescriptor - Description of the 2D convolution layer. + /// @param name - Optional name for the layer. + /// @return - Interface for configuring the layer. + IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const char* name = nullptr); + /// Adds a 2D convolution layer to the network. /// @param convolution2dDescriptor - Description of the 2D convolution layer. /// @param weights - Tensor for the weights data. /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape. /// @param name - Optional name for the layer. /// @return - Interface for configuring the layer. + ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This AddConvolution2dLayer overload is deprecated", "22.08") IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, const ConstTensor& weights, const Optional& biases, @@ -331,9 +339,8 @@ public: /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer. /// @param name - Optional name for the layer. /// @return - Interface for configuring the layer. - IConnectableLayer* AddDepthwiseConvolution2dLayer( - const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, - const char* name = nullptr); + IConnectableLayer* AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, + const char* name = nullptr); /// Adds a 2D depthwise convolution layer to the network. /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer. diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp index acc8aa00b3..025fca7eb0 100644 --- a/include/armnn/LayerVisitorBase.hpp +++ b/include/armnn/LayerVisitorBase.hpp @@ -72,6 +72,10 @@ public: const Optional&, const char*) override { DefaultPolicy::Apply(__func__); } + void VisitConvolution2dLayer(const IConnectableLayer*, + const Convolution2dDescriptor&, + const char*) override { DefaultPolicy::Apply(__func__); } + void VisitDepthToSpaceLayer(const IConnectableLayer*, const DepthToSpaceDescriptor&, const char*) override { DefaultPolicy::Apply(__func__); } diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp index fde979ba41..2d70d7add0 100644 --- a/src/armnn/BackendHelper.cpp +++ b/src/armnn/BackendHelper.cpp @@ -373,6 +373,31 @@ bool LayerSupportHandle::IsConvolution2dSupported(const TensorInfo& input, TensorInfo biasesVal = biases.has_value() ? biases.value() : TensorInfo(); TensorInfos infos{input, output, weights, biasesVal}; + Optional capability ; + if(!m_BackendId.IsUndefined()) + { + capability = GetCapability("ConstantTensorsAsInputs", m_BackendId); + if(!capability.has_value() || capability.value().GetValue().AsBool() == false) + { + if(!weights.IsConstant()) + { + return false; + } + if (descriptor.m_BiasEnabled && !biases.has_value()) + { + return false; + } + + + // At the first stage we will only print a warning. this is to give + // backend developers a chance to adopt and read weights from input slots. + ARMNN_LOG(warning) << "The backend makes use of a deprecated interface to read constant tensors. " + "If you are a backend developer please find more information in our " + "doxygen documentation on github https://github.com/ARM-software/armnn " + "under the keyword 'ConstTensorsAsInputs'."; + } + } + return m_LayerSupport->IsLayerSupported(LayerType::Convolution2d, infos, descriptor, diff --git a/src/armnn/Descriptors.cpp b/src/armnn/Descriptors.cpp index d67d4404e0..4eb875e03d 100644 --- a/src/armnn/Descriptors.cpp +++ b/src/armnn/Descriptors.cpp @@ -425,16 +425,10 @@ int StridedSliceDescriptor::GetStopForAxis(const TensorShape& inputShape, } -uint32_t FullyConnectedDescriptor::GetNumViews() const +uint32_t GetNumInputs(bool biasEnabled) { - return GetNumInputs(); -} - -uint32_t FullyConnectedDescriptor::GetNumInputs() const -{ - // Return 2 otherwise check if bias is enabled unsigned int numInputs = 2; - if (m_BiasEnabled) + if (biasEnabled) { numInputs = 3; } @@ -443,24 +437,27 @@ uint32_t FullyConnectedDescriptor::GetNumInputs() const uint32_t Convolution3dDescriptor::GetNumInputs() const { - // Return 2 otherwise check if bias is enabled - unsigned int numInputs = 2; - if (m_BiasEnabled) - { - numInputs = 3; - } - return numInputs; + return armnn::GetNumInputs(m_BiasEnabled); +} + +uint32_t Convolution2dDescriptor::GetNumInputs() const +{ + return armnn::GetNumInputs(m_BiasEnabled); +} + +uint32_t FullyConnectedDescriptor::GetNumInputs() const +{ + return armnn::GetNumInputs(m_BiasEnabled); +} + +uint32_t FullyConnectedDescriptor::GetNumViews() const +{ + return armnn::GetNumInputs(m_BiasEnabled); } uint32_t DepthwiseConvolution2dDescriptor::GetNumInputs() const { - // Return 2 otherwise check if bias is enabled - unsigned int numInputs = 2; - if (m_BiasEnabled) - { - numInputs = 3; - } - return numInputs; + return armnn::GetNumInputs(m_BiasEnabled); } } diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index c1cec482b6..8500e529b0 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -603,16 +603,19 @@ void Graph::ConstructErrorMessageForUnconnectedInputs(Layer* const layer, bool noWeightsAndBias = false; if ((layer->GetType() == armnn::LayerType::FullyConnected || + layer->GetType() == armnn::LayerType::Convolution2d || layer->GetType() == armnn::LayerType::Convolution3d || layer->GetType() == armnn::LayerType::DepthwiseConvolution2d) && slotIndex > 0) { + message << std::endl; + // If weights are not set and is bias enabled, also check if bias is set if (slotIndex == 1 && layer->GetNumInputSlots() == 3) { const IOutputSlot* biasSource = layer->GetInputSlot(2).GetConnectedOutputSlot(); if (biasSource == NULL) { - message << layer->GetName() << " layer weights and bias not set: "; + message << "Weights and bias layers not set." << std::endl; noWeightsAndBias = true; } } @@ -622,11 +625,11 @@ void Graph::ConstructErrorMessageForUnconnectedInputs(Layer* const layer, { if (slotIndex == 1) { - message << layer->GetName() << " layer weights not set: "; + message << "Weights layer not set." << std::endl; } else { - message << layer->GetName() << " layer bias not set: "; + message << "Bias layer not set." << std::endl; } } } @@ -634,9 +637,10 @@ void Graph::ConstructErrorMessageForUnconnectedInputs(Layer* const layer, std::string slotString = noWeightsAndBias ? "1 & 2" : std::to_string(slotIndex); message << "Input slot(s) " << slotString - << " not connected to an output slot on " + << " for " << GetLayerTypeAsCString(layer->GetType()) - << " layer " + << " not connected to an output slot. " << std::endl + << "Layer name: " << std::quoted(layer->GetName()); throw LayerValidationException(message.str()); } diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index a31119b395..3241b5024e 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -23,16 +23,23 @@ namespace armnn // Instantiate the static member variable NullDescriptor Layer::m_NullDescriptor; -template -void AssertMultipleInputSlots(Layer& layer) +void AssertNumberOfInputSlots(Layer& layer) { - if(PolymorphicDowncast(&(layer.GetParameters()))->m_BiasEnabled) + switch (layer.GetType()) { - ARMNN_ASSERT(layer.GetNumInputSlots() == 3); - } - else - { - ARMNN_ASSERT(layer.GetNumInputSlots() == 2); + case LayerType::Convolution2d: + case LayerType::DepthwiseConvolution2d: + case LayerType::FullyConnected: + { + ARMNN_ASSERT(layer.GetNumInputSlots() == 2 || + layer.GetNumInputSlots() == 3); + break; + } + default: + { + ARMNN_ASSERT(layer.GetNumInputSlots() == 1); + break; + } } } @@ -47,19 +54,7 @@ void InputSlot::Insert(Layer& layer) // Disconnects parent from this. prevSlot->Disconnect(*this); - switch (layer.GetType()) - { - case LayerType::DepthwiseConvolution2d: - { - AssertMultipleInputSlots(layer); - break; - } - default: - { - ARMNN_ASSERT(layer.GetNumInputSlots() == 1); - break; - } - } + AssertNumberOfInputSlots(layer); // Connects inserted layer to parent. int idx = prevSlot->Connect(layer.GetInputSlot(0)); diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index a88fa5ab9c..228927db57 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -330,10 +330,10 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, if (layer->GetType() == LayerType::Constant) { + // Place the Constant Workloads into a queue so that they can be executed first ConstWorkloads.push_back(m_WorkloadQueue.back().get()); } } - // release the constant data in the layer.. layer->ReleaseConstantData(); break; @@ -513,10 +513,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, AllocateAndExecuteConstantWorkloadsAsync(); } } - - // If synchronous, execute all constant layer workloads as the FoldPad optimization - // may have created a new conv2d layer prior to the input constant layers which will - // cause a failure if constant workloads are not executed + // If synchronous, execute all constant layer workloads if (!networkProperties.m_AsyncEnabled) { for (auto workload: ConstWorkloads) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index d2ebd4cde6..479e57fc56 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -83,35 +83,23 @@ IConnectableLayer* INetwork::AddConcatLayer(const ConcatDescriptor& concatDescri IConnectableLayer* INetwork::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional& biases, const char* name) { - return pNetworkImpl->AddConvolution2dLayer(convolution2dDescriptor, weights, biases, name); + return pNetworkImpl->AddConvolution2dLayer(convolution2dDescriptor, name); } - -IConnectableLayer* INetwork::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const char* name) -{ - Optional biases; - return pNetworkImpl->AddConvolution2dLayer(convolution2dDescriptor, weights, biases, name); -} - - +ARMNN_NO_DEPRECATE_WARN_BEGIN IConnectableLayer* INetwork::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const ConstTensor& biases, - const char* name ) + const ConstTensor& weights, + const Optional& biases, + const char* name) { - return pNetworkImpl->AddConvolution2dLayer(convolution2dDescriptor, weights, armnn::Optional(biases), name); } - +ARMNN_NO_DEPRECATE_WARN_END IConnectableLayer* INetwork::AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor, const char* name) @@ -2012,25 +2000,33 @@ IConnectableLayer* NetworkImpl::AddConcatLayer(const ConcatDescriptor& concatDes return m_Graph->AddLayer(concatDescriptor, name); } -IConnectableLayer* NetworkImpl::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional& biases, - const char* name) +IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const char* name) { - if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value()) - { - throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty"); - } - - const auto layer = m_Graph->AddLayer(convolution2dDescriptor, name); + return m_Graph->AddLayer(convolution2dDescriptor, name); +} +IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const Optional& biases, + const char* name) +{ + auto layer = m_Graph->AddLayer(convolution2dDescriptor, name); + // Add a constant layer for weights + ConstantLayer* weightsLayer = m_Graph->AddLayer("Weights"); + weightsLayer->m_LayerOutput = std::make_shared(weights); layer->m_Weight = std::make_shared(weights); - - if (convolution2dDescriptor.m_BiasEnabled) + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + // Add a constant layer for biases + if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled) { + ConstantLayer* biasLayer = m_Graph->AddLayer("Bias"); + biasLayer->m_LayerOutput = std::make_shared(biases.value()); layer->m_Bias = std::make_shared(biases.value()); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo()); + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); } - return layer; } @@ -2044,31 +2040,6 @@ IConnectableLayer* NetworkImpl::AddConvertFp32ToFp16Layer(const char* name) return m_Graph->AddLayer(name); } -IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional& biases, - const char* name) -{ - return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name); -} - -IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const char* name) -{ - Optional biases; - return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name); -} - -IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const ConstTensor& biases, - const char* name) -{ - Optional optionalBiases(biases); - return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name); -} - IConnectableLayer* NetworkImpl::AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor, const char* name) { diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index c5ed8de50d..c2be600d05 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -70,6 +70,10 @@ public: IConnectableLayer* AddConcatLayer(const ConcatDescriptor& concatDescriptor, const char* name = nullptr); + IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const char* name = nullptr); + + ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This AddConvolution2dLayer overload is deprecated", "22.11") IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, const ConstTensor& weights, const Optional& biases, @@ -256,10 +260,6 @@ public: void ExecuteStrategy(IStrategy& strategy) const; private: - IConnectableLayer* AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional& biases, - const char* name); bool GetShapeInferenceMethod(); NetworkOptions m_NetworkOptions; diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 666ce3d069..7597798fa4 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -98,6 +98,15 @@ std::vector InsertConvertFp32ToBf16LayersBefore(Graph& for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) { bool allowInsert = true; + + if ((layer.GetType() == LayerType::Convolution2d || + layer.GetType() == LayerType::FullyConnected || + layer.GetType() == LayerType::DepthwiseConvolution2d) + && inputSlot->GetSlotIndex() == 2) + { + // Refrain from reducing bias to Bf16 + continue; + } if (expectCorrectInputType) { // Only insert ConvertFp32ToBf16Layer before FP32 input slots diff --git a/src/armnn/Tensor.cpp b/src/armnn/Tensor.cpp index 6a4dbf8dae..ab4ecc9194 100644 --- a/src/armnn/Tensor.cpp +++ b/src/armnn/Tensor.cpp @@ -362,9 +362,7 @@ TensorInfo::TensorInfo(unsigned int numDimensions, float quantizationScale, int32_t quantizationOffset, bool isConstant) - : m_Shape(numDimensions, dimensionSizes) - , m_DataType(dataType) - , m_IsConstant(isConstant) + : m_Shape(numDimensions, dimensionSizes), m_DataType(dataType), m_IsConstant(isConstant) { SetQuantizationScale(quantizationScale); SetQuantizationOffset(quantizationOffset); diff --git a/src/armnn/layers/Convolution2dLayer.cpp b/src/armnn/layers/Convolution2dLayer.cpp index ef5db8e9b9..7b3382bf93 100644 --- a/src/armnn/layers/Convolution2dLayer.cpp +++ b/src/armnn/layers/Convolution2dLayer.cpp @@ -21,7 +21,7 @@ namespace armnn { Convolution2dLayer::Convolution2dLayer(const Convolution2dDescriptor& param, const char* name) - : LayerWithParameters(1, 1, LayerType::Convolution2d, param, name) + : LayerWithParameters(param.GetNumInputs(), 1, LayerType::Convolution2d, param, name) { } @@ -32,7 +32,7 @@ void Convolution2dLayer::SerializeLayerParameters(ParameterStringifyFunction& fn const std::vector& inputShapes = { GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(), - m_Weight->GetTensorInfo().GetShape() + GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape() }; const TensorShape filterShape = inputShapes[1]; DataLayoutIndexed dataLayoutIndex(m_Param.m_DataLayout); @@ -49,15 +49,14 @@ void Convolution2dLayer::SerializeLayerParameters(ParameterStringifyFunction& fn std::unique_ptr Convolution2dLayer::CreateWorkload(const IWorkloadFactory& factory) const { // on this level constant data should not be released.. - ARMNN_ASSERT_MSG(m_Weight != nullptr, "Convolution2dLayer: Weights data should not be null."); ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Convolution2dLayer_CreateWorkload"); Convolution2dQueueDescriptor descriptor; - - descriptor.m_Weight = m_Weight.get(); - - if (m_Param.m_BiasEnabled) + if (m_Weight) + { + descriptor.m_Weight = m_Weight.get(); + } + if (m_Param.m_BiasEnabled && m_Bias) { - ARMNN_ASSERT_MSG(m_Bias != nullptr, "Convolution2dLayer: Bias data should not be null."); descriptor.m_Bias = m_Bias.get(); } @@ -120,18 +119,18 @@ std::vector Convolution2dLayer::InferOutputShapes(const std::vector void Convolution2dLayer::ValidateTensorShapesFromInputs() { - VerifyLayerConnections(1, CHECK_LOCATION()); + VerifyLayerConnections(m_Param.GetNumInputs(), CHECK_LOCATION()); const TensorShape& outputShape = GetOutputSlot(0).GetTensorInfo().GetShape(); VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod); - // check if we m_Weight data is not nullptr - ARMNN_ASSERT_MSG(m_Weight != nullptr, "Convolution2dLayer: Weights data should not be null."); + ARMNN_ASSERT_MSG(GetInputSlot(1).GetConnection(), + "Convolution2dLayer: Weights should be connected to input slot 1."); - auto inferredShapes = InferOutputShapes({ - GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(), - m_Weight->GetTensorInfo().GetShape() }); + std::vector inferredShapes = InferOutputShapes({ + GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(), + GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape() }); ARMNN_ASSERT(inferredShapes.size() == 1); @@ -147,33 +146,13 @@ Layer::ConstantTensors Convolution2dLayer::GetConstantTensorsByRef() ARMNN_NO_DEPRECATE_WARN_BEGIN void Convolution2dLayer::Accept(ILayerVisitor& visitor) const { - ManagedConstTensorHandle managedWeight(m_Weight); - ConstTensor weightsTensor(managedWeight.GetTensorInfo(), managedWeight.Map()); - - Optional optionalBiasTensor = EmptyOptional(); - ManagedConstTensorHandle managedBias(m_Bias); - if (GetParameters().m_BiasEnabled) - { - ConstTensor biasTensor(managedBias.GetTensorInfo(), managedBias.Map()); - optionalBiasTensor = Optional(biasTensor); - } - - visitor.VisitConvolution2dLayer(this, GetParameters(), weightsTensor, optionalBiasTensor, GetName()); + visitor.VisitConvolution2dLayer(this, GetParameters(), GetName()); } ARMNN_NO_DEPRECATE_WARN_END void Convolution2dLayer::ExecuteStrategy(IStrategy& strategy) const { - ManagedConstTensorHandle managedWeight(m_Weight); - std::vector constTensors { { managedWeight.GetTensorInfo(), managedWeight.Map() } }; - - ManagedConstTensorHandle managedBias(m_Bias); - if (GetParameters().m_BiasEnabled) - { - constTensors.emplace_back(ConstTensor(managedBias.GetTensorInfo(), managedBias.Map())); - } - - strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); + strategy.ExecuteStrategy(this, GetParameters(), { }, GetName()); } } // namespace armnn diff --git a/src/armnn/layers/Convolution2dLayer.hpp b/src/armnn/layers/Convolution2dLayer.hpp index 844747831c..6bb86da18e 100644 --- a/src/armnn/layers/Convolution2dLayer.hpp +++ b/src/armnn/layers/Convolution2dLayer.hpp @@ -17,8 +17,10 @@ class Convolution2dLayer : public LayerWithParameters public: /// A unique pointer to store Weight values. + /// @Note: Deprecated. Removal date is 22.11. Weights are stored in ConstantLayers now. std::shared_ptr m_Weight; /// A unique pointer to store Bias values. + /// @Note: Deprecated. Removal date is 22.11. Bias are stored in ConstantLayers now. std::shared_ptr m_Bias; /// Makes a workload for the Convolution2d type. @@ -59,8 +61,8 @@ protected: /// Default destructor ~Convolution2dLayer() = default; - /// Retrieve the handles to the constant values stored by the layer. - /// @return A vector of the constant tensors stored by this layer. + /// @Note Deprecated. GetConstantTensorsByRef is deprecated. m_Weights and m_Bias + /// should be connected to layer as Constant Layers instead." ConstantTensors GetConstantTensorsByRef() override; }; diff --git a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp index bbaabb815e..eb6bc90afd 100644 --- a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp +++ b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp @@ -146,8 +146,22 @@ Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection) const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName(); auto& newLayer2d = *graph.InsertNewLayer(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str()); - // Reconnect the pad layer with its original parent. newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot); + // Start at 1 to connect only weights and bias + for (unsigned int i = 1; i < layer2d.GetNumInputSlots(); ++i) + { + if (layer2d.GetInputSlot(i).GetConnectedOutputSlot() != nullptr) + { + Layer& tgtLayer = layer2d.GetInputSlot(i).GetConnectedOutputSlot()->GetOwningLayer(); + // Ensure we are definitely connecting the necessary constant layers + if (tgtLayer.GetType() == armnn::LayerType::Constant) + { + // Remove old connection and connect to new layer2d + tgtLayer.GetOutputSlot(0).Disconnect(layer2d.GetInputSlot(i)); + tgtLayer.GetOutputSlot(0).Connect(newLayer2d.GetInputSlot(i)); + } + } + } // Moves connections in old layer2d layer output to new layer. // Old layer2d layer will be removed as it's left unconnected. @@ -168,14 +182,19 @@ public: { const auto conv2dLayer = PolymorphicDowncast(&connection.GetOwningLayer()); // Copy weights and bias to the new convolution layer - ARMNN_ASSERT_MSG(conv2dLayer->m_Weight != nullptr, - "FoldPadIntoConvolution2d: Weights data should not be null."); + ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr, + "FoldPadIntoConvolution2d: New convolution layer is missing connection to weights layer"); + + // Deprecated 22.11 newConv2dLayer->m_Weight = std::move(conv2dLayer->m_Weight); if (conv2dLayer->GetParameters().m_BiasEnabled) { - ARMNN_ASSERT_MSG(conv2dLayer->m_Bias != nullptr, - "FoldPadIntoConvolution2d: Bias data should not be null if bias is enabled."); + ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr, + "FoldPadIntoConvolution2d: New convolution layer is missing " + "connection to bias layer."); + + // Deprecated 22.11 newConv2dLayer->m_Bias = std::move(conv2dLayer->m_Bias); } } @@ -191,26 +210,25 @@ class FoldPadIntoDepthwiseConvolution2dImpl public: void Run(Graph& graph, InputSlot& connection) const { - const auto newLayer2d = FoldPadIntoLayer2dImpl(graph, connection); + const auto newConv2dLayer = FoldPadIntoLayer2dImpl(graph, connection); - if (newLayer2d != nullptr) + if (newConv2dLayer != nullptr) { - const auto layer2d = PolymorphicDowncast(&connection.GetOwningLayer()); + const auto conv2dLayer = PolymorphicDowncast(&connection.GetOwningLayer()); + // Copy weights and bias to the new convolution layer + ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr, + "FoldPadIntoDepthwiseConvolution2d: New convolution layer is missing connection to weights layer"); - // Move weights and bias layer connections to the new convolution layer - ARMNN_ASSERT_MSG(layer2d->GetInputSlot(1).GetConnection() != nullptr, - "FoldPadIntoDepthwiseConvolution2d: Weights data should not be null."); - Layer& weightLayer = layer2d->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); - weightLayer.GetOutputSlot(0).Disconnect(layer2d->GetInputSlot(1)); - weightLayer.GetOutputSlot(0).Connect(newLayer2d->GetInputSlot(1)); + // Deprecated 22.11 + newConv2dLayer->m_Weight = std::move(conv2dLayer->m_Weight); - if (layer2d->GetParameters().m_BiasEnabled) + if (conv2dLayer->GetParameters().m_BiasEnabled) { - ARMNN_ASSERT_MSG(layer2d->GetInputSlot(2).GetConnection() != nullptr, - "FoldPadIntoDepthwiseConvolution2d: Bias data should not be null if bias is enabled."); - Layer& biasLayer = layer2d->GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer(); - biasLayer.GetOutputSlot(0).Disconnect(layer2d->GetInputSlot(2)); - biasLayer.GetOutputSlot(0).Connect(newLayer2d->GetInputSlot(2)); + ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr, + "FoldPadIntoConvolution2d: New convolution layer is missing " + "connection to bias layer."); + // Deprecated 22.11 + newConv2dLayer->m_Bias = std::move(conv2dLayer->m_Bias); } } } diff --git a/src/armnn/optimizations/FuseBatchNorm.hpp b/src/armnn/optimizations/FuseBatchNorm.hpp index 6a50fc4a0c..bca0c7d00a 100644 --- a/src/armnn/optimizations/FuseBatchNorm.hpp +++ b/src/armnn/optimizations/FuseBatchNorm.hpp @@ -14,8 +14,8 @@ namespace armnn namespace optimizations { -template > +template> class FuseBatchNorm { public: @@ -26,7 +26,7 @@ public: /// combined with the parameters of the child BatchNorm layer. void Run(Graph& graph, InputSlot& connection) const { - Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); Layer& child = connection.GetOwningLayer(); bool depthwise = (base.GetType() == LayerType::DepthwiseConvolution2d); @@ -37,7 +37,7 @@ public: if (base.GetDataType() == ArmnnType && child.GetDataType() == ArmnnType) { OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot(); - auto convLayer = PolymorphicDowncast(&base); + auto convLayer = PolymorphicDowncast(&base); auto batchNormLayer = PolymorphicDowncast(&child); // Read convolution and batch norm parameters @@ -50,25 +50,16 @@ public: ConstTensor meanTensor(batchNormLayer->m_Mean->GetTensorInfo(), batchNormLayer->m_Mean->Map(true)); ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(true)); - auto convDescriptor = convLayer->GetParameters(); + auto convDescriptor = convLayer->GetParameters(); ConstTensor weightsTensor; - if (convLayer->GetNumInputSlots() > 1) - { - ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[1].GetConnection() != nullptr, - "FuseBatchNorm: Weight data should not be null."); - InputSlot & oldSlotWeights = const_cast(convLayer->GetInputSlots()[1]); - OutputSlot & constantSlotWeights = const_cast(*oldSlotWeights.GetConnectedOutputSlot()); - ConstantLayer* weightLayer = PolymorphicDowncast( - &constantSlotWeights.GetOwningLayer()); - weightsTensor = ConstTensor(weightLayer->m_LayerOutput->GetTensorInfo(), - weightLayer->m_LayerOutput->Map(true)); - } - else - { - ARMNN_ASSERT_MSG(convLayer->m_Weight != nullptr, - "FuseBatchNorm: Bias data should not be null if bias is enabled."); - weightsTensor = ConstTensor(convLayer->m_Weight->GetTensorInfo(), convLayer->m_Weight->Map(true)); - } + ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[1].GetConnection() != nullptr, + "FuseBatchNorm: Weight data should not be null."); + + ConstantLayer* weightLayer = PolymorphicDowncast( + &base.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer()); + + weightsTensor = ConstTensor(weightLayer->m_LayerOutput->GetTensorInfo(), + weightLayer->m_LayerOutput->Map(true)); armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout); auto weightsShape = weightsTensor.GetInfo().GetShape(); @@ -76,9 +67,9 @@ public: const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1; const unsigned int outputChannels = depthwise ? weightsShape[3] : weightsShape[0]; const unsigned int weightsHeight = depthwise ? weightsShape[1] : - weightsShape[dataLayout.GetHeightIndex()]; + weightsShape[dataLayout.GetHeightIndex()]; const unsigned int weightsWidth = depthwise ? weightsShape[2] : - weightsShape[dataLayout.GetWidthIndex()]; + weightsShape[dataLayout.GetWidthIndex()]; const auto* weightsBuffer = static_cast(weightsTensor.GetMemoryArea()); const auto* betaBuffer = static_cast(betaTensor.GetMemoryArea()); @@ -99,7 +90,7 @@ public: { for (unsigned int cOut = 0; cOut < outputChannels; ++cOut) { - T mult = gammaVector[cOut] / static_cast(sqrtf (varianceVector[cOut] + epsilon)); + T mult = gammaVector[cOut] / static_cast(sqrtf(varianceVector[cOut] + epsilon)); for (unsigned int h = 0; h < weightsHeight; ++h) { @@ -140,23 +131,14 @@ public: if (biasWasEnabledBeforeOpt) { ConstTensor biasTensor; - if (convLayer->GetNumInputSlots() > 1) - { - ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[2].GetConnection() != nullptr, - "FuseBatchNorm: Bias data should not be null if bias is enabled."); - InputSlot & oldSlotBias = const_cast(convLayer->GetInputSlots()[2]); - OutputSlot & constantSlotBias = const_cast(*oldSlotBias.GetConnectedOutputSlot()); - ConstantLayer* biasLayer = PolymorphicDowncast( - &constantSlotBias.GetOwningLayer()); - biasTensor = ConstTensor(biasLayer->m_LayerOutput->GetTensorInfo(), - biasLayer->m_LayerOutput->Map(true)); - } - else - { - ARMNN_ASSERT_MSG(convLayer->m_Bias != nullptr, - "FuseBatchNorm: Bias data should not be null if bias is enabled."); - biasTensor = ConstTensor(convLayer->m_Bias->GetTensorInfo(), convLayer->m_Bias->Map(true)); - } + ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[2].GetConnection() != nullptr, + "FuseBatchNorm: Bias data should not be null if bias is enabled."); + + ConstantLayer* biasLayer = PolymorphicDowncast( + &base.GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer()); + + biasTensor = ConstTensor(biasLayer->m_LayerOutput->GetTensorInfo(), + biasLayer->m_LayerOutput->Map(true)); const auto* biasBuffer = static_cast(biasTensor.GetMemoryArea()); std::vector biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements()); @@ -192,8 +174,6 @@ public: // This optimization will always have 3 input slots on the Conv2d base layer if (newConv2dLayer.GetNumInputSlots() > 1) { - ConstantLayer* weightLayer = PolymorphicDowncast( - &base.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer()); // Remove old connection and connect to new layer2d weightLayer->GetOutputSlot(0).Disconnect(base.GetInputSlot(1)); weightLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(1)); diff --git a/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp b/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp index cb97a0fe32..483377452e 100644 --- a/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp +++ b/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp @@ -29,6 +29,7 @@ public: case LayerType::BatchNormalization: break; case LayerType::Convolution2d: + RedirectWeightsAndBiases(&layer); break; case LayerType::DepthwiseConvolution2d: RedirectWeightsAndBiases(&layer); diff --git a/src/armnn/test/ConstTensorLayerVisitor.cpp b/src/armnn/test/ConstTensorLayerVisitor.cpp index af0581ce4c..701327b120 100644 --- a/src/armnn/test/ConstTensorLayerVisitor.cpp +++ b/src/armnn/test/ConstTensorLayerVisitor.cpp @@ -119,16 +119,22 @@ TEST_CASE("CheckConvolution2dLayer") descriptor.m_StrideX = 2; descriptor.m_StrideY = 3; descriptor.m_DataLayout = DataLayout::NHWC; + descriptor.m_BiasEnabled = false; std::vector data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; std::vector dimensions = {1, 1, 3, 3}; ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data); - TestConvolution2dLayerVisitor visitor(descriptor, weights, EmptyOptional()); + TestConstantLayerVisitor weightsVisitor(weights); + TestConvolution2dLayerVisitor visitor(descriptor); NetworkImpl net; - IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, weights, EmptyOptional()); + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + weightsLayer->ExecuteStrategy(weightsVisitor); layer->ExecuteStrategy(visitor); } @@ -148,11 +154,17 @@ TEST_CASE("CheckNamedConvolution2dLayer") std::vector dimensions = {1, 1, 3, 3}; ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data); - TestConvolution2dLayerVisitor visitor(descriptor, weights, EmptyOptional(), layerName); + TestConstantLayerVisitor weightsVisitor(weights); + TestConvolution2dLayerVisitor visitor(descriptor, layerName); NetworkImpl net; - IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, weights, EmptyOptional(), layerName); + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, layerName); + + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + weightsLayer->ExecuteStrategy(weightsVisitor); layer->ExecuteStrategy(visitor); } @@ -175,13 +187,21 @@ TEST_CASE("CheckConvolution2dLayerWithBiases") std::vector biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; std::vector biasDimensions = {1, 1, 3, 3}; ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData); - Optional optionalBiases(biases); - TestConvolution2dLayerVisitor visitor(descriptor, weights, optionalBiases); + TestConstantLayerVisitor weightsVisitor(weights); + TestConstantLayerVisitor biasVisitor(biases); + TestConvolution2dLayerVisitor visitor(descriptor); NetworkImpl net; + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const biasLayer = net.AddConstantLayer(biases); + IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor); - IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, weights, optionalBiases); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); + + biasLayer->ExecuteStrategy(biasVisitor); + weightsLayer->ExecuteStrategy(weightsVisitor); layer->ExecuteStrategy(visitor); } @@ -205,13 +225,21 @@ TEST_CASE("CheckNamedConvolution2dLayerWithBiases") std::vector biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; std::vector biasDimensions = {1, 1, 3, 3}; ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData); - Optional optionalBiases(biases); - TestConvolution2dLayerVisitor visitor(descriptor, weights, optionalBiases, layerName); + TestConstantLayerVisitor weightsVisitor(weights); + TestConstantLayerVisitor biasVisitor(biases); + TestConvolution2dLayerVisitor visitor(descriptor, layerName); NetworkImpl net; + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const biasLayer = net.AddConstantLayer(biases); + IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, layerName); - IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, weights, optionalBiases, layerName); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); + + biasLayer->ExecuteStrategy(biasVisitor); + weightsLayer->ExecuteStrategy(weightsVisitor); layer->ExecuteStrategy(visitor); } diff --git a/src/armnn/test/ConstTensorLayerVisitor.hpp b/src/armnn/test/ConstTensorLayerVisitor.hpp index 00d17b4ae8..1f1b3f5262 100644 --- a/src/armnn/test/ConstTensorLayerVisitor.hpp +++ b/src/armnn/test/ConstTensorLayerVisitor.hpp @@ -21,22 +21,18 @@ class TestConvolution2dLayerVisitor : public TestLayerVisitor { public: explicit TestConvolution2dLayerVisitor(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional& biases, const char* name = nullptr) : TestLayerVisitor(name) , m_Descriptor(convolution2dDescriptor) - , m_Weights(weights) - , m_Biases(biases) {} virtual ~TestConvolution2dLayerVisitor() {} void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const armnn::BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id = 0) override + const armnn::BaseDescriptor& descriptor, + const std::vector& constants, + const char* name, + const armnn::LayerBindingId id = 0) override { armnn::IgnoreUnused(descriptor, constants, id); switch (layer->GetType()) @@ -46,12 +42,6 @@ public: CheckLayerPointer(layer); CheckLayerName(name); CheckDescriptor(static_cast(descriptor)); - CheckConstTensors(m_Weights, constants[0]); - if (m_Biases.has_value()) - { - CHECK(constants.size() == 2); - CheckConstTensors(m_Biases.value(), constants[1]); - } break; } default: @@ -66,8 +56,6 @@ protected: private: Convolution2dDescriptor m_Descriptor; - ConstTensor m_Weights; - Optional m_Biases; }; class TestDepthwiseConvolution2dLayerVisitor : public TestLayerVisitor diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp index c64c0a0d40..7756f40623 100644 --- a/src/armnn/test/NetworkTests.cpp +++ b/src/armnn/test/NetworkTests.cpp @@ -77,18 +77,18 @@ TEST_CASE("NetworkModification") armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32, 0.0f, 0, true), convWeightsData); armnn::Convolution2dDescriptor convDesc2d; - armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, - weights, - armnn::EmptyOptional(), - "conv layer"); + armnn::IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights, "conv const weights"); + armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, "conv layer"); CHECK(convLayer); + CHECK(weightsLayer); inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1)); armnn::FullyConnectedDescriptor fullyConnectedDesc; // Constant layer that now holds weights data for FullyConnected - armnn::IConnectableLayer* const constantWeightsLayer = net.AddConstantLayer(weights, "const weights"); + armnn::IConnectableLayer* const constantWeightsLayer = net.AddConstantLayer(weights, "fc const weights"); armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc, "fully connected"); CHECK(constantWeightsLayer); @@ -155,12 +155,13 @@ TEST_CASE("NetworkModification") multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); //Tests that all layers are present in the graph. - CHECK(net.GetGraph().GetNumLayers() == 12); + CHECK(net.GetGraph().GetNumLayers() == 13); //Tests that the vertices exist and have correct names. CHECK(GraphHasNamedLayer(net.GetGraph(), "input layer")); CHECK(GraphHasNamedLayer(net.GetGraph(), "conv layer")); - CHECK(GraphHasNamedLayer(net.GetGraph(), "const weights")); + CHECK(GraphHasNamedLayer(net.GetGraph(), "conv const weights")); + CHECK(GraphHasNamedLayer(net.GetGraph(), "fc const weights")); CHECK(GraphHasNamedLayer(net.GetGraph(), "fully connected")); CHECK(GraphHasNamedLayer(net.GetGraph(), "pooling2d")); CHECK(GraphHasNamedLayer(net.GetGraph(), "activation")); @@ -239,8 +240,8 @@ TEST_CASE("NetworkModification") CHECK(AreAllLayerInputSlotsConnected(*outputLayer)); // Checks connectivity. - checkOneOutputToOneInputConnection(inputLayer, convLayer, 0); - checkOneOutputToTwoInputConnectionForTwoDifferentLayers(convLayer, constantWeightsLayer, fullyConnectedLayer, 1, 0); + checkOneOutputToTwoInputConnectionForTwoDifferentLayers(inputLayer, weightsLayer, convLayer, 0, 0); + checkOneOutputToTwoInputConnectionForTwoDifferentLayers(convLayer, constantWeightsLayer, fullyConnectedLayer, 2, 0); checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer, 2, 1); checkOneOutputToOneInputConnection(poolingLayer, activationLayer); checkOneOutputToOneInputConnection(activationLayer, normalizationLayer); @@ -619,10 +620,12 @@ TEST_CASE("ObtainConv2DDescriptorFromIConnectableLayer") convDesc2d.m_DilationY = 3; convDesc2d.m_BiasEnabled = false; convDesc2d.m_DataLayout = armnn::DataLayout::NCHW; + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, weights, armnn::EmptyOptional(), "conv layer"); + ARMNN_NO_DEPRECATE_WARN_END CHECK(convLayer); const armnn::BaseDescriptor& descriptor = convLayer->GetParameters(); diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index 6a13dc6456..3dd55279c6 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -441,6 +441,11 @@ void CreateConvolution2dGraph(Graph &graph, const unsigned int* inputShape, Layer* input = graph.AddLayer(0, "input"); input->GetOutputSlot().SetTensorInfo(inputInfo); + ConstantLayer* weightsLayer = nullptr; + weightsLayer = graph.AddLayer("Weights"); + weightsLayer->m_LayerOutput = std::make_shared(weights); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); + Convolution2dLayer* layer = graph.AddLayer(desc, "conv2d"); layer->m_Weight = std::make_unique(weights); layer->GetOutputSlot().SetTensorInfo(outputInfo); @@ -448,6 +453,7 @@ void CreateConvolution2dGraph(Graph &graph, const unsigned int* inputShape, Layer* output = graph.AddLayer(0, "output"); input->GetOutputSlot().Connect(layer->GetInputSlot(0)); layer->GetOutputSlot().Connect(output->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); } TEST_CASE("Conv2dValidateTensorShapesFromInputs") @@ -875,40 +881,70 @@ TEST_CASE("OptimizeForExclusiveConnectionsFuseTest") ConstTensor mean(TensorInfo(1, outputChannelSize, DataType::Float32, 0.0f, 0, true), meanVector); ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32, 0.0f, 0, true), varianceVector); + ConstantLayer* biasLayer = nullptr; + // Define the network Graph graph; auto input = graph.AddLayer(0, "input"); + auto weightsLayer = graph.AddLayer("Weights"); auto conv = graph.AddLayer(convolution2dDescriptor, "convolution"); auto batchNorm = graph.AddLayer(batchNormDescriptor, "batchNorm"); auto output = graph.AddLayer(0, "output"); // Set layer information input->GetOutputSlot().SetTensorInfo(inputInfo); + + weightsLayer->m_LayerOutput = std::make_shared(weights); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); conv->GetOutputSlot().SetTensorInfo(outputInfo); + batchNorm->GetOutputSlot().SetTensorInfo(outputInfo); - conv->m_Weight = std::make_unique(weights); batchNorm->m_Beta = std::make_unique(beta); batchNorm->m_Gamma = std::make_unique(gamma); batchNorm->m_Mean = std::make_unique(mean); batchNorm->m_Variance = std::make_unique(variance); + if (convolution2dDescriptor.m_BiasEnabled) { std::vector biasVector = { 11 }; ConstTensor bias(TensorInfo(1, outputChannelSize, DataType::Float32, 0.0f, 0, true), biasVector); - conv->m_Bias = std::make_unique(bias); + biasLayer =graph.AddLayer("Bias"); + biasLayer->m_LayerOutput = std::make_shared(bias); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo()); + biasLayer->GetOutputSlot(0).Connect(conv->GetInputSlot(2)); + conv->m_Bias = biasLayer->m_LayerOutput; } // Connect layers input->GetOutputSlot(0).Connect(conv->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(conv->GetInputSlot(1)); conv->GetOutputSlot(0).Connect(batchNorm->GetInputSlot(0)); batchNorm->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - CHECK(4 == graph.GetNumLayers()); - CHECK(CheckSequence(graph.cbegin(), graph.cend(), - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType)); + // Temporary workaround to ensure the descriptor weights are populated + conv->m_Weight = weightsLayer->m_LayerOutput; + + if (convolution2dDescriptor.m_BiasEnabled) + { + CHECK(6 == graph.GetNumLayers()); + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + } + else + { + CHECK(5 == graph.GetNumLayers()); + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + } // Optimize graph armnn::Optimizer::Pass(graph, MakeOptimizations(FuseBatchNormIntoConvolution2DFloat32())); @@ -918,11 +954,13 @@ TEST_CASE("OptimizeForExclusiveConnectionsFuseTest") (layer->GetNameStr() == "fused-batchNorm-into-convolution"); }; - CHECK(3 == graph.GetNumLayers()); + CHECK(5 == graph.GetNumLayers()); CHECK(CheckSequence(graph.cbegin(), graph.cend(), - &IsLayerOfType, - checkFusedConv2d, - &IsLayerOfType)); + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + checkFusedConv2d, + &IsLayerOfType)); } // Tests that OptimizeForExclusiveConnections works, not fusing when not needed, using BatchNorm fusing as example diff --git a/src/armnn/test/ShapeInferenceTests.cpp b/src/armnn/test/ShapeInferenceTests.cpp index d45c9900c0..a3800ade09 100644 --- a/src/armnn/test/ShapeInferenceTests.cpp +++ b/src/armnn/test/ShapeInferenceTests.cpp @@ -275,8 +275,6 @@ TEST_CASE("Convolution2dTest") { const TensorShape inputShape{1, 1, 10, 10}; - Graph graph; - Convolution2dDescriptor descriptor; descriptor.m_PadLeft = 0; @@ -288,16 +286,9 @@ TEST_CASE("Convolution2dTest") descriptor.m_DilationX = 3; descriptor.m_DilationY = 3; - auto layer = BuildGraph(&graph, - {inputShape}, - descriptor, - "conv2d"); - - const float Datum = 0.0f; - ConstTensor weights({{1, 1, 3, 3}, DataType::Float32, 0.0f, 0, true}, &Datum); - layer->m_Weight = std::make_unique(weights); - - RunShapeInferenceTest(layer, {{ 1, 1, 4, 4 }}); + CreateGraphAndRunTest({ inputShape, { 1, 1, 3, 3 } }, + { { 1, 1, 4, 4 } }, descriptor, + "convd"); } TEST_CASE("DebugLayerTest") diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp index 048c4f51fd..d7465c8361 100644 --- a/src/armnn/test/SubgraphViewTests.cpp +++ b/src/armnn/test/SubgraphViewTests.cpp @@ -42,28 +42,44 @@ bool AreAnySubgraphLayersPresentInGraph(const SubgraphView::IConnectableLayers & // // this helper only works if all layers where the inputs connect to are not selected // -SubgraphView::InputSlots CreateInputsFrom(const std::vector& layers) +SubgraphView::InputSlots CreateInputsFrom(const std::vector& layers, + std::vector ignoreSlots = {}) { SubgraphView::InputSlots result; for (auto&& layer : layers) { for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it) { - result.push_back(&(*it)); + if (std::find(ignoreSlots.begin(), ignoreSlots.end(), it->GetSlotIndex()) != ignoreSlots.end()) + { + continue; + } + else + { + result.push_back(&(*it)); + } } } return result; } /// Duplication for IConnectableLayer -SubgraphView::IInputSlots CreateIInputsFrom(const std::vector& layers) +SubgraphView::IInputSlots CreateIInputsFrom(const std::vector& layers, + std::vector ignoreSlots = {}) { SubgraphView::IInputSlots result; - for (auto&& layer : layers) + for (auto&& layer: layers) { - for (unsigned int i = 0 ; i < layer->GetNumInputSlots(); ++i) + for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) { - result.push_back(&(layer->GetInputSlot(i))); + if (std::find(ignoreSlots.begin(), ignoreSlots.end(), i) != ignoreSlots.end()) + { + continue; + } + else + { + result.push_back(&(layer->GetInputSlot(i))); + } } } return result; @@ -241,7 +257,7 @@ TEST_CASE("SubgraphViewSlots") // Construct sub-graph SubgraphViewSelector::SubgraphViewPtr subgraph = CreateSubgraphViewFrom({}, - CreateIInputsFrom({convLayer1}), + CreateIInputsFrom({convLayer1}, {1, 2}), CreateIOutputsFrom({convLayer2})); // Test that both old and new are initialized @@ -327,17 +343,20 @@ TEST_CASE("SingleInputSingleOutput") Convolution2dDescriptor convDescriptor; Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); - + Layer* const weightsLayer1 = graph.AddLayer("weights1"); + Layer* const weightsLayer2 = graph.AddLayer("weights2"); Layer* const outputLayer = graph.AddLayer(0, "output"); inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + weightsLayer1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(1)); convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + weightsLayer2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(1)); convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Construct sub-graph SubgraphViewSelector::SubgraphViewPtr subgraph = CreateSubgraphViewFrom({}, - CreateIInputsFrom({convLayer1}), + CreateIInputsFrom({convLayer1}, {1}), CreateIOutputsFrom({convLayer2})); // Save sub-graph connections for comparison after substitution @@ -377,7 +396,7 @@ TEST_CASE("SingleInputSingleOutputAddPrecompiledLayerSubstituteSubgraph1") convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Construct sub-graph - SubgraphViewSelector::SubgraphViewPtr subgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + SubgraphViewSelector::SubgraphViewPtr subgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}, {1}), CreateOutputsFrom({convLayer2}), {}); @@ -421,7 +440,7 @@ TEST_CASE("SingleInputSingleOutputAddPrecompiledLayerSubstituteSubgraph2") convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Construct sub-graph - SubgraphViewSelector::SubgraphViewPtr subgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + SubgraphViewSelector::SubgraphViewPtr subgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}, {1}), CreateOutputsFrom({convLayer2}), {}); @@ -467,7 +486,7 @@ TEST_CASE("SingleInputSingleOutputSubstituteGraph") // Construct sub-graph SubgraphViewSelector::SubgraphViewPtr subgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}, {1}), CreateOutputsFrom({convLayer2}), {}); @@ -519,7 +538,7 @@ TEST_CASE("MultiInputSingleOutput") concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Construct sub-graph - auto subgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1, convLayer2}), + auto subgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1, convLayer2}, {1}), CreateOutputsFrom({concatLayer}), {}); @@ -621,7 +640,7 @@ TEST_CASE("MultiInputMultiOutput") // Construct sub-graph SubgraphViewSelector::SubgraphViewPtr subgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1, convLayer2}), + CreateSubgraphViewFrom(CreateInputsFrom({convLayer1, convLayer2}, {1}), CreateOutputsFrom({convLayer1, convLayer2}), {}); @@ -942,7 +961,8 @@ TEST_CASE("MultipleSimpleSubgraphs") // This test case represents the scenario when we have two distinct subgraphs // in a simple linear network. The selected nodes are the M* and the // non-selected ones are the X* - // + // W2 ->-> + // | // X1 -> M1 -> M2 -> X2 -> M3 -> X3 // // The expected results is two subgraphs, one with {M1, M2} and another one @@ -952,12 +972,17 @@ TEST_CASE("MultipleSimpleSubgraphs") // the graph is constructed in reverse order auto x3 = graph.AddLayer(0, "output"); + auto m3 = graph.InsertNewLayer(x3->GetInputSlot(0), ActivationDescriptor{}, "m3"); + auto x2 = graph.InsertNewLayer(m3->GetInputSlot(0), - Convolution2dDescriptor{}, - "x2"); + Convolution2dDescriptor{}, + "x2"); + + auto w2 = graph.InsertNewLayer(x2->GetInputSlot(1), "w2"); + auto m2 = graph.InsertNewLayer(x2->GetInputSlot(0), ActivationDescriptor{}, "m2"); @@ -966,6 +991,7 @@ TEST_CASE("MultipleSimpleSubgraphs") "m1"); graph.InsertNewLayer(m1->GetInputSlot(0), 0, "x1"); + IgnoreUnused(w2); // All selected 'M*' layers will be of Activation type SubgraphViewSelector::Subgraphs subgraphs = SubgraphViewSelector::SelectSubgraphs( @@ -1636,10 +1662,17 @@ TEST_CASE("SingleSubgraph") Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); convLayer2->SetBackendId(Compute::GpuAcc); + Layer* const weights1 = graph.AddLayer("weights1"); + weights1->SetBackendId(Compute::GpuAcc); + Layer* const weights2 = graph.AddLayer("weights2"); + weights2->SetBackendId(Compute::GpuAcc); + Layer* const outputLayer = graph.AddLayer(0, "output"); inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + weights1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(1)); convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + weights2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(1)); convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // GpuAcc sub graph selector @@ -1702,6 +1735,9 @@ TEST_CASE("MultipleSubgraphs") Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + Layer* const weights1 = graph.AddLayer("weights1"); + Layer* const weights2 = graph.AddLayer("weights2"); + OriginsDescriptor concatDescriptor(2); Layer* const pConcatLayer = graph.AddLayer(concatDescriptor, "concat"); pConcatLayer->SetBackendId(Compute::CpuAcc); @@ -1711,7 +1747,9 @@ TEST_CASE("MultipleSubgraphs") inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + weights1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(1)); convLayer1->GetOutputSlot(0).Connect(pConcatLayer->GetInputSlot(0)); + weights2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(1)); convLayer2->GetOutputSlot(0).Connect(pConcatLayer->GetInputSlot(1)); pConcatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); diff --git a/src/armnn/test/optimizations/FoldPadTests.cpp b/src/armnn/test/optimizations/FoldPadTests.cpp index 9919c6d0e6..027b10377d 100644 --- a/src/armnn/test/optimizations/FoldPadTests.cpp +++ b/src/armnn/test/optimizations/FoldPadTests.cpp @@ -47,6 +47,12 @@ TEST_CASE("FoldPadLayerIntoConvolution2dLayer") std::vector weightsVector(18); ConstTensor weights(TensorInfo(4, weightsShape, DataType::Float32, 0.0f, 0, true), weightsVector); + ConstantLayer* weightsLayer = graph.AddLayer("Weights"); + weightsLayer->m_LayerOutput = std::make_shared(weights); + + TensorInfo weightsInfo = weightsLayer->m_LayerOutput->GetTensorInfo(); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + Convolution2dLayer* conv2dLayer = graph.AddLayer(convolution2dDescriptor, "conv2d"); conv2dLayer->m_Weight = std::make_unique(weights); conv2dLayer->GetOutputSlot().SetTensorInfo(outputInfo); @@ -56,6 +62,7 @@ TEST_CASE("FoldPadLayerIntoConvolution2dLayer") // Connect up layers - input -> pad -> conv2d -> output input->GetOutputSlot().Connect(padLayer->GetInputSlot(0)); padLayer->GetOutputSlot().Connect(conv2dLayer->GetInputSlot(0)); + weightsLayer->GetOutputSlot().Connect(conv2dLayer->GetInputSlot(1)); conv2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); auto checkSimpleConv2d = [](const Layer* const layer)->bool { @@ -69,10 +76,11 @@ TEST_CASE("FoldPadLayerIntoConvolution2dLayer") }; CHECK(CheckSequence(graph.cbegin(), graph.cend(), - &IsLayerOfType, - &IsLayerOfType, - checkSimpleConv2d, - &IsLayerOfType)); + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + checkSimpleConv2d, + &IsLayerOfType)); armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(FoldPadIntoConvolution2d())); @@ -87,9 +95,10 @@ TEST_CASE("FoldPadLayerIntoConvolution2dLayer") }; CHECK(CheckSequence(graph.cbegin(), graph.cend(), - &IsLayerOfType, - checkPadFoldedIntoConv2d, - &IsLayerOfType)); + &IsLayerOfType, + checkPadFoldedIntoConv2d, + &IsLayerOfType, + &IsLayerOfType)); } TEST_CASE("FoldPadLayerIntoDepthwiseConvolution2dLayer") @@ -628,12 +637,12 @@ TEST_CASE("FoldPadLayerIntoConv2dLayer_ExecuteInferenceWithAndWithoutOptimizatio TensorInfo biasInfo({4}, DataType::Float32, 0.0f, 0, true); ConstTensor bias(biasInfo, biasVector); Optional optionalBias = Optional(bias); - + ARMNN_NO_DEPRECATE_WARN_BEGIN IConnectableLayer* conv2dLayer = network->AddConvolution2dLayer(convDescriptor, weights, optionalBias, "Conv2D"); - + ARMNN_NO_DEPRECATE_WARN_END TensorInfo outputInfo(4, outputShape, DataType::Float32); conv2dLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp index e5f54208f0..0cca86f93b 100644 --- a/src/armnn/test/optimizations/FuseActivationTests.cpp +++ b/src/armnn/test/optimizations/FuseActivationTests.cpp @@ -42,7 +42,7 @@ struct Convolution2dTest { using LayerType = Convolution2dLayer; static const bool isElementWise = false; - static const bool isConstTensorAsInputSupported = false; + static const bool isConstTensorAsInputSupported = true; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout @@ -69,8 +69,9 @@ struct Convolution2dTest TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true); ConstTensor weights(weightsInfo, weightsVector); Optional optionalBias; - + ARMNN_NO_DEPRECATE_WARN_BEGIN return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name); + ARMNN_NO_DEPRECATE_WARN_END } static std::vector AddConstantLayers(INetwork* network, diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp index b28bb17773..4a94f7889b 100644 --- a/src/armnn/test/optimizations/FuseBatchNormTests.cpp +++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp @@ -24,7 +24,6 @@ class Conv2dTest public: using ConvDescriptorType = armnn::Convolution2dDescriptor; using ConvLayerType = armnn::Convolution2dLayer; - static const bool isConstTensorAsInputSupported = false; static IConnectableLayer *AddConvolution(INetwork *network, const Convolution2dDescriptor &descriptor, @@ -32,7 +31,9 @@ public: const Optional &biases, const char *name) { + ARMNN_NO_DEPRECATE_WARN_BEGIN return network->AddConvolution2dLayer(descriptor, weights, biases, name); + ARMNN_NO_DEPRECATE_WARN_END } static std::vector AddConstantLayers(INetwork *network, @@ -54,13 +55,12 @@ class DepthwiseConv2dTest public: using ConvDescriptorType = armnn::DepthwiseConvolution2dDescriptor; using ConvLayerType = armnn::DepthwiseConvolution2dLayer; - static const bool isConstTensorAsInputSupported = true; - static IConnectableLayer *AddConvolution(INetwork *network, - const DepthwiseConvolution2dDescriptor &descriptor, - const ConstTensor &weights, - const Optional &biases, - const char *name) + static IConnectableLayer* AddConvolution(INetwork* network, + const DepthwiseConvolution2dDescriptor& descriptor, + const ConstTensor& weights, + const Optional& biases, + const char* name) { IgnoreUnused(weights); IgnoreUnused(biases); @@ -183,19 +183,15 @@ INetworkPtr CreateNetwork(bool depthwise, bool preventFusing) output2Layer = network->AddOutputLayer(1); } - // If ConstTensorAsInputs is supported weights and bias are stored as constant layers. - if (Conv2dTest::isConstTensorAsInputSupported) - { - std::vector constantLayers = Conv2dTest::AddConstantLayers(network.get(), - convolution2dDescriptor, - weights, - Optional()); + std::vector constantLayers = Conv2dTest::AddConstantLayers(network.get(), + convolution2dDescriptor, + weights, + Optional()); - // Connect constant layers to receiverLayer. - for (unsigned int i = 0; i < constantLayers.size(); ++i) - { - constantLayers[i]->GetOutputSlot(0).Connect(convLayer->GetInputSlot(i + 1)); - } + // Connect constant layers to receiverLayer. + for (unsigned int i = 0; i < constantLayers.size(); ++i) + { + constantLayers[i]->GetOutputSlot(0).Connect(convLayer->GetInputSlot(i + 1)); } // Set layer information @@ -241,26 +237,14 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b (layer->GetNameStr() == "fused-batchNorm-into-convolution"); }; - if (Conv2dTest::isConstTensorAsInputSupported) - { - CHECK(5 == graphFused.GetNumLayers()); - CHECK(CheckSequence(graphFused.cbegin(), - graphFused.cend(), - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - checkFusedConv2d, - &IsLayerOfType)); - } - else - { - CHECK(3 == graphFused.GetNumLayers()); - CHECK(CheckSequence(graphFused.cbegin(), - graphFused.cend(), - &IsLayerOfType, - checkFusedConv2d, - &IsLayerOfType)); - } + CHECK(5 == graphFused.GetNumLayers()); + CHECK(CheckSequence(graphFused.cbegin(), + graphFused.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + checkFusedConv2d, + &IsLayerOfType)); // Load network into runtime NetworkId networkIdentifier; @@ -278,10 +262,10 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0); inputTensorInfo.SetConstant(true); - InputTensors inputTensorsFused { + InputTensors inputTensorsFused { {0, ConstTensor(inputTensorInfo, inputDataFused.data())}}; OutputTensors outputTensorsFused{ - {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}}; + {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}}; // Execute network run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused); @@ -294,33 +278,19 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options // Optimise ArmNN network - IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec()); + IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, { backendId }, runNotFused->GetDeviceSpec()); Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get()); - if (Conv2dTest::isConstTensorAsInputSupported) - { - CHECK(6 == graphNotFused.GetNumLayers()); - CHECK(CheckSequence(graphNotFused.cbegin(), - graphNotFused.cend(), - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType)); - } - else - { - CHECK(5 == graphNotFused.GetNumLayers()); - CHECK(CheckSequence(graphNotFused.cbegin(), - graphNotFused.cend(), - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType)); - } + CHECK(6 == graphNotFused.GetNumLayers()); + CHECK(CheckSequence(graphNotFused.cbegin(), + graphNotFused.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); // Load network into runtime NetworkId networkIdentifierNotFused; @@ -341,10 +311,10 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b TensorInfo inputTensorInfo2 = runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0); inputTensorInfo2.SetConstant(true); InputTensors inputTensorsNotFused{ - {0, ConstTensor(inputTensorInfo2, inputDataNotFused.data())}}; + { 0, ConstTensor(inputTensorInfo2, inputDataNotFused.data()) } }; OutputTensors outputTensorsNotFused{ - {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())}, - {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}}; + { 0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data()) }, + { 1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data()) } }; // Execute network runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused); diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp index 704b6c35c1..04dde73b20 100644 --- a/src/armnnDeserializer/Deserializer.cpp +++ b/src/armnnDeserializer/Deserializer.cpp @@ -1423,44 +1423,69 @@ void IDeserializer::DeserializerImpl::ParseConvolution2d(GraphPtr graph, unsigne CHECK_LAYERS(graph, 0, layerIndex); auto inputs = GetInputs(graph, layerIndex); CHECK_LOCATION(); - CHECK_VALID_SIZE(inputs.size(), 1); auto outputs = GetOutputs(graph, layerIndex); CHECK_VALID_SIZE(outputs.size(), 1); - auto serializerLayer = graph->layers()->Get(layerIndex)->layer_as_Convolution2dLayer(); + auto flatBufferLayer = graph->layers()->Get(layerIndex)->layer_as_Convolution2dLayer(); + auto layerName = GetLayerName(graph, layerIndex); - auto serializerDescriptor = serializerLayer->descriptor(); + auto flatbufferDescriptor = flatBufferLayer->descriptor(); armnn::Convolution2dDescriptor descriptor; - descriptor.m_PadLeft = serializerDescriptor->padLeft(); - descriptor.m_PadRight = serializerDescriptor->padRight(); - descriptor.m_PadTop = serializerDescriptor->padTop(); - descriptor.m_PadBottom = serializerDescriptor->padBottom(); - descriptor.m_StrideX = serializerDescriptor->strideX(); - descriptor.m_StrideY = serializerDescriptor->strideY();; - descriptor.m_DilationX = serializerDescriptor->dilationX(); - descriptor.m_DilationY = serializerDescriptor->dilationY();; - descriptor.m_BiasEnabled = serializerDescriptor->biasEnabled();; - descriptor.m_DataLayout = ToDataLayout(serializerDescriptor->dataLayout()); + descriptor.m_PadLeft = flatbufferDescriptor->padLeft(); + descriptor.m_PadRight = flatbufferDescriptor->padRight(); + descriptor.m_PadTop = flatbufferDescriptor->padTop(); + descriptor.m_PadBottom = flatbufferDescriptor->padBottom(); + descriptor.m_StrideX = flatbufferDescriptor->strideX(); + descriptor.m_StrideY = flatbufferDescriptor->strideY();; + descriptor.m_DilationX = flatbufferDescriptor->dilationX(); + descriptor.m_DilationY = flatbufferDescriptor->dilationY();; + descriptor.m_BiasEnabled = flatbufferDescriptor->biasEnabled();; + descriptor.m_DataLayout = ToDataLayout(flatbufferDescriptor->dataLayout()); - armnn::ConstTensor weights = ToConstTensor(serializerLayer->weights()); - armnn::ConstTensor biases; + armnn::IConnectableLayer* layer; + std::vector ignoreSlots {}; - armnn::Optional optionalBiases = armnn::EmptyOptional(); - if (descriptor.m_BiasEnabled) + armnn::ConstTensor biasTensor; + // Weights and biases used to be always constant and were stored as members of the layer. This has changed and + // they are now passed as inputs. If they are constant then they will be stored in a ConstantLayer. + if (this->GetFeatureVersions(graph).m_ConstTensorsAsInputs <= 0) + { + // If the model stores weights and biases as members of the layer we have to read them from there + // but add them to their own ConstantLayer for compatibility + CHECK_VALID_SIZE(inputs.size(), 1); + + layer = m_Network->AddConvolution2dLayer(descriptor, + layerName.c_str()); + + armnn::ConstTensor weightsTensor = ToConstTensor(flatBufferLayer->weights()); + auto weightsLayer = m_Network->AddConstantLayer(weightsTensor); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsTensor.GetInfo()); + ignoreSlots.emplace_back(1u); + + if (descriptor.m_BiasEnabled) + { + biasTensor = ToConstTensor(flatBufferLayer->biases()); + auto biasLayer = m_Network->AddConstantLayer(biasTensor); + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u)); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensor.GetInfo()); + ignoreSlots.emplace_back(2u); + } + } + else { - biases = ToConstTensor(serializerLayer->biases()); - optionalBiases = armnn::Optional(biases); + layer = m_Network->AddConvolution2dLayer(descriptor, + layerName.c_str()); + uint32_t numInputs = descriptor.GetNumInputs(); + CHECK_VALID_SIZE(inputs.size(), numInputs); } - IConnectableLayer* layer = m_Network->AddConvolution2dLayer(descriptor, - weights, - optionalBiases, - layerName.c_str()); + armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]); layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); - RegisterInputSlots(graph, layerIndex, layer); + RegisterInputSlots(graph, layerIndex, layer, ignoreSlots); RegisterOutputSlots(graph, layerIndex, layer); } diff --git a/src/armnnDeserializer/test/DeserializeConvolution2d.cpp b/src/armnnDeserializer/test/DeserializeConvolution2d.cpp index 6461250570..e099597845 100644 --- a/src/armnnDeserializer/test/DeserializeConvolution2d.cpp +++ b/src/armnnDeserializer/test/DeserializeConvolution2d.cpp @@ -121,6 +121,171 @@ struct Convolution2dFixture : public ParserFlatbuffersSerializeFixture } }; +struct Convolution2dFixtureConstantAsInput : public ParserFlatbuffersSerializeFixture +{ + explicit Convolution2dFixtureConstantAsInput(const std::string & inputShape1, + const std::string & outputShape, + const std::string & weightsShape, + const std::string & dataType) + { + m_JsonString = R"( + { + inputIds: [0], + outputIds: [3], + layers: [{ + layer_type: "InputLayer", + layer: { + base: { + layerBindingId: 0, + base: { + index: 0, + layerName: "InputLayer", + layerType: "Input", + inputSlots: [{ + index: 0, + connection: {sourceLayerIndex:0, outputSlotIndex:0 }, + }], + outputSlots: [{ + index: 0, + tensorInfo: { + dimensions: )" + inputShape1 + R"(, + dataType: )" + dataType + R"(, + quantizationScale: 0.5, + quantizationOffset: 0 + }, + }] + }, + } + }, + }, + { + layer_type: "Convolution2dLayer", + layer : { + base: { + index:1, + layerName: "Convolution2dLayer", + layerType: "Convolution2d", + inputSlots: [ + { + index: 0, + connection: {sourceLayerIndex:0, outputSlotIndex:0 }, + }, + { + index: 1, + connection: { + sourceLayerIndex: 2, + outputSlotIndex: 0 + } + } + ], + outputSlots: [ + { + index: 0, + tensorInfo: { + dimensions: )" + outputShape + R"(, + dataType: )" + dataType + R"( + }, + } + ], + }, + descriptor: { + padLeft: 1, + padRight: 1, + padTop: 1, + padBottom: 1, + strideX: 2, + strideY: 2, + biasEnabled: false, + dataLayout: NHWC + } + } + }, + { + layer_type: "ConstantLayer", + layer: { + base: { + index: 2, + layerName: "Weights", + layerType: "Constant", + inputSlots: [ + + ], + outputSlots: [ + { + index: 0, + tensorInfo: { + dimensions: )" + weightsShape + R"(, + dataType: )" + dataType + R"(, + quantizationScale: 0.1, + dimensionSpecificity: [ + true, + true, + true, + true + ] + } + } + ] + }, + input: { + info: { + dimensions: )" + weightsShape + R"(, + dataType: )" + dataType + R"(, + quantizationScale: 0.1, + dimensionSpecificity: [ + true, + true, + true, + true + ] + }, + data_type: "IntData", + data: { + data: [ + 1082130432, 1084227584, 1086324736, + 0 ,0 ,0 , + 1077936128, 1073741824, 1065353216 + ] + } + } + } + }, + { + layer_type: "OutputLayer", + layer: { + base:{ + layerBindingId: 0, + base: { + index: 3, + layerName: "OutputLayer", + layerType: "Output", + inputSlots: [{ + index: 0, + "connection": { + "sourceLayerIndex": 1, + "outputSlotIndex": 0 + } + }], + outputSlots: [ { + index: 0, + tensorInfo: { + dimensions: )" + outputShape + R"(, + dataType: )" + dataType + R"( + }, + }], + } + }}, + }], + "featureVersions": { + "constantTensorsAsInputs": 1, + "weightsLayoutScheme": 1 + } + } + )"; + Setup(); + } +}; + struct SimpleConvolution2dFixture : Convolution2dFixture { SimpleConvolution2dFixture() : Convolution2dFixture("[ 1, 5, 5, 1 ]", @@ -137,4 +302,21 @@ TEST_CASE_FIXTURE(SimpleConvolution2dFixture, "Convolution2dFloat32") {{"OutputLayer", {23, 33, 24, 91, 99, 48, 26, 50, 19}}}); } + +struct SimpleConvolution2dFixtureConstantAsInput : Convolution2dFixtureConstantAsInput +{ + SimpleConvolution2dFixtureConstantAsInput() : Convolution2dFixtureConstantAsInput("[ 1, 5, 5, 1 ]", + "[ 1, 3, 3, 1 ]", + "[ 1, 3, 3, 1 ]", + "Float32") {} +}; + +TEST_CASE_FIXTURE(SimpleConvolution2dFixtureConstantAsInput, "Convolution2dFloat32ConstAsInput") +{ + RunTest<4, armnn::DataType::Float32>( + 0, + {{"InputLayer", {1, 5, 2, 3, 5, 8, 7, 3, 6, 3, 3, 3, 9, 1, 9, 4, 1, 8, 1, 3, 6, 8, 1, 9, 2}}}, + {{"OutputLayer", {23, 33, 24, 91, 99, 48, 26, 50, 19}}}); +} + } diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp index dd6a06fd00..4eaf63653b 100644 --- a/src/armnnOnnxParser/OnnxParser.cpp +++ b/src/armnnOnnxParser/OnnxParser.cpp @@ -1754,6 +1754,8 @@ void OnnxParserImpl::ParseConv(const onnx::NodeProto& node) } armnn::IConnectableLayer* layer; + std::vector tensorIndexes= {node.input(0), node.input(1)}; + auto weightTensor = CreateConstTensor(node.input(1)); if (node.input_size() == 3) @@ -1766,7 +1768,9 @@ void OnnxParserImpl::ParseConv(const onnx::NodeProto& node) CHECK_LOCATION().AsString())); } desc.m_BiasEnabled = true; + tensorIndexes.emplace_back(node.input(2)); auto biasTensor = CreateConstTensor(node.input(2)); + ARMNN_NO_DEPRECATE_WARN_BEGIN layer = m_Network->AddConvolution2dLayer(desc, weightTensor.first, Optional(biasTensor.first), @@ -1778,6 +1782,7 @@ void OnnxParserImpl::ParseConv(const onnx::NodeProto& node) weightTensor.first, EmptyOptional(), node.name().c_str()); + ARMNN_NO_DEPRECATE_WARN_END } ARMNN_ASSERT(layer != nullptr); @@ -1788,7 +1793,7 @@ void OnnxParserImpl::ParseConv(const onnx::NodeProto& node) // register the input connection slots for the layer, connections are made after all layers have been created // only the tensors for the inputs are relevant, exclude the const tensors - RegisterInputSlots(layer, {node.input(0)}); + RegisterInputSlots(layer, tensorIndexes); // register the output connection slots for the layer, connections are made after all layers have been created RegisterOutputSlots(layer, {node.output(0)}); diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp index 99d1c2bd18..488dac6186 100644 --- a/src/armnnSerializer/Serializer.cpp +++ b/src/armnnSerializer/Serializer.cpp @@ -347,13 +347,10 @@ void SerializerStrategy::SerializeConstantLayer(const armnn::IConnectableLayer* // Build FlatBuffer for Convolution2dLayer void SerializerStrategy::SerializeConvolution2dLayer(const armnn::IConnectableLayer* layer, const armnn::Convolution2dDescriptor& descriptor, - const std::vector& constants, const char* name) { IgnoreUnused(name); - const armnn::ConstTensor weights = constants[0]; - // Create FlatBuffer BaseLayer auto flatBufferBaseLayer = CreateLayerBase(layer, serializer::LayerType::LayerType_Convolution2d); @@ -368,21 +365,11 @@ void SerializerStrategy::SerializeConvolution2dLayer(const armnn::IConnectableLa descriptor.m_DilationY, descriptor.m_BiasEnabled, GetFlatBufferDataLayout(descriptor.m_DataLayout)); - auto flatBufferWeightsConstTensorInfo = CreateConstTensorInfo(weights); - flatbuffers::Offset flatBufferBiasesConstTensorInfo; - - if (constants.size() > 1) - { - const armnn::ConstTensor biases = constants[1]; - flatBufferBiasesConstTensorInfo = CreateConstTensorInfo(biases); - } // Create the FlatBuffer Convolution2dLayer auto flatBufferLayer = CreateConvolution2dLayer(m_flatBufferBuilder, flatBufferBaseLayer, - flatBufferDescriptor, - flatBufferWeightsConstTensorInfo, - flatBufferBiasesConstTensorInfo); + flatBufferDescriptor); // Add the AnyLayer to the FlatBufferLayers CreateAnyLayer(flatBufferLayer.o, serializer::Layer::Layer_Convolution2dLayer); @@ -2048,7 +2035,6 @@ void SerializerStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer, static_cast(descriptor); SerializeConvolution2dLayer(layer, layerDescriptor, - constants, name); break; } diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp index afde778dc2..1a0978f0de 100644 --- a/src/armnnSerializer/Serializer.hpp +++ b/src/armnnSerializer/Serializer.hpp @@ -145,7 +145,6 @@ private: void SerializeConvolution2dLayer(const armnn::IConnectableLayer* layer, const armnn::Convolution2dDescriptor& descriptor, - const std::vector& constants, const char* name = nullptr); void SerializeConvolution3dLayer(const armnn::IConnectableLayer* layer, diff --git a/src/armnnSerializer/test/SerializerTestUtils.cpp b/src/armnnSerializer/test/SerializerTestUtils.cpp index cf2cb15b15..187384777d 100644 --- a/src/armnnSerializer/test/SerializerTestUtils.cpp +++ b/src/armnnSerializer/test/SerializerTestUtils.cpp @@ -51,17 +51,17 @@ void LayerVerifierBase::VerifyNameAndConnections(const armnn::IConnectableLayer* const armnn::TensorInfo& connectedInfo = connectedOutput->GetTensorInfo(); CHECK(connectedInfo.GetShape() == m_InputTensorInfos[i].GetShape()); - CHECK( - GetDataTypeName(connectedInfo.GetDataType()) == GetDataTypeName(m_InputTensorInfos[i].GetDataType())); + CHECK(GetDataTypeName(connectedInfo.GetDataType()) == GetDataTypeName(m_InputTensorInfos[i].GetDataType())); - // If weights and bias are connected to DepthwiseConvolution2d via Constant Layer we do not check. - // Constant Layer already disabled in SerializerTestUtils.hpp from entering function. - if (layer->GetType() == armnn::LayerType::DepthwiseConvolution2d && - connectedOutput->GetOwningIConnectableLayer().GetType() != armnn::LayerType::Constant) + if (connectedInfo.HasMultipleQuantizationScales()) + { + CHECK(connectedInfo.GetQuantizationScales() == m_InputTensorInfos[i].GetQuantizationScales()); + } + else { CHECK(connectedInfo.GetQuantizationScale() == m_InputTensorInfos[i].GetQuantizationScale()); - CHECK(connectedInfo.GetQuantizationOffset() == m_InputTensorInfos[i].GetQuantizationOffset()); } + CHECK(connectedInfo.GetQuantizationOffset() == m_InputTensorInfos[i].GetQuantizationOffset()); } for (unsigned int i = 0; i < m_OutputTensorInfos.size(); i++) diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp index 278715bfa4..a042939265 100644 --- a/src/armnnSerializer/test/SerializerTests.cpp +++ b/src/armnnSerializer/test/SerializerTests.cpp @@ -333,6 +333,39 @@ TEST_CASE("SerializeConstant") deserializedNetwork->ExecuteStrategy(verifier); } +using Convolution2dDescriptor = armnn::Convolution2dDescriptor; +class Convolution2dLayerVerifier : public LayerVerifierBaseWithDescriptor +{ +public: + Convolution2dLayerVerifier(const std::string& layerName, + const std::vector& inputInfos, + const std::vector& outputInfos, + const Convolution2dDescriptor& descriptor) + : LayerVerifierBaseWithDescriptor(layerName, inputInfos, outputInfos, descriptor) {} + + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const armnn::BaseDescriptor& descriptor, + const std::vector& constants, + const char* name, + const armnn::LayerBindingId id = 0) override + { + armnn::IgnoreUnused(constants, id); + switch (layer->GetType()) + { + case armnn::LayerType::Input: break; + case armnn::LayerType::Output: break; + case armnn::LayerType::Constant: break; + default: + { + VerifyNameAndConnections(layer, name); + const Convolution2dDescriptor& layerDescriptor = + static_cast(descriptor); + CHECK(layerDescriptor.m_BiasEnabled == m_Descriptor.m_BiasEnabled); + } + } + } +}; + TEST_CASE("SerializeConvolution2d") { const std::string layerName("convolution2d"); @@ -362,11 +395,13 @@ TEST_CASE("SerializeConvolution2d") armnn::INetworkPtr network = armnn::INetwork::Create(); armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(descriptor, weights, armnn::Optional(biases), layerName.c_str()); + ARMNN_NO_DEPRECATE_WARN_END armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); @@ -378,9 +413,7 @@ TEST_CASE("SerializeConvolution2d") armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network)); CHECK(deserializedNetwork); - const std::vector& constants {weights, biases}; - LayerVerifierBaseWithDescriptorAndConstants verifier( - layerName, {inputInfo}, {outputInfo}, descriptor, constants); + Convolution2dLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor); deserializedNetwork->ExecuteStrategy(verifier); } @@ -417,25 +450,134 @@ TEST_CASE("SerializeConvolution2dWithPerAxisParams") armnn::INetworkPtr network = armnn::INetwork::Create(); armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(descriptor, weights, armnn::Optional(biases), layerName.c_str()); + ARMNN_NO_DEPRECATE_WARN_END + armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network)); + CHECK(deserializedNetwork); + + Convolution2dLayerVerifier verifier(layerName, {inputInfo, kernelInfo, biasInfo}, {outputInfo}, descriptor); + + deserializedNetwork->ExecuteStrategy(verifier); +} + +TEST_CASE("SerializeConvolution2dWeightsAndBiasesAsConstantLayers") +{ + const std::string layerName("convolution2d"); + const armnn::TensorInfo inputInfo ({ 1, 5, 5, 1 }, armnn::DataType::Float32); + const armnn::TensorInfo outputInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32); + + const armnn::TensorInfo weightsInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32, 0.0f, 0, true); + const armnn::TensorInfo biasesInfo ({ 1 }, armnn::DataType::Float32, 0.0f, 0, true); + + std::vector weightsData = GenerateRandomData(weightsInfo.GetNumElements()); + armnn::ConstTensor weights(weightsInfo, weightsData); + + std::vector biasesData = GenerateRandomData(biasesInfo.GetNumElements()); + armnn::ConstTensor biases(biasesInfo, biasesData); + + armnn::Convolution2dDescriptor descriptor; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 2; + descriptor.m_DilationX = 2; + descriptor.m_DilationY = 2; + descriptor.m_BiasEnabled = true; + descriptor.m_DataLayout = armnn::DataLayout::NHWC; + + armnn::INetworkPtr network = armnn::INetwork::Create(); + armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); + armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights"); + armnn::IConnectableLayer* const biasesLayer = network->AddConstantLayer(biases, "Biases"); + armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(descriptor, + layerName.c_str()); + armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1)); + biasesLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2)); + convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + biasesLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo); + convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network)); + CHECK(deserializedNetwork); + + Convolution2dLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor); + + deserializedNetwork->ExecuteStrategy(verifier); +} + +TEST_CASE("SerializeConvolution2dWeightsAndBiasesAsConstantLayers") +{ + const std::string layerName("convolution2d"); + const armnn::TensorInfo inputInfo ({ 1, 5, 5, 1 }, armnn::DataType::Float32); + const armnn::TensorInfo outputInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32); + + const armnn::TensorInfo weightsInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32, 0.0f, 0, true); + const armnn::TensorInfo biasesInfo ({ 1 }, armnn::DataType::Float32, 0.0f, 0, true); + + std::vector weightsData = GenerateRandomData(weightsInfo.GetNumElements()); + armnn::ConstTensor weights(weightsInfo, weightsData); + + std::vector biasesData = GenerateRandomData(biasesInfo.GetNumElements()); + armnn::ConstTensor biases(biasesInfo, biasesData); + + armnn::Convolution2dDescriptor descriptor; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 2; + descriptor.m_DilationX = 2; + descriptor.m_DilationY = 2; + descriptor.m_BiasEnabled = true; + descriptor.m_DataLayout = armnn::DataLayout::NHWC; + + armnn::INetworkPtr network = armnn::INetwork::Create(); + armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); + armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights"); + armnn::IConnectableLayer* const biasesLayer = network->AddConstantLayer(biases, "Biases"); + armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(descriptor, + layerName.c_str()); armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1)); + biasesLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2)); convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + biasesLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo); convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network)); CHECK(deserializedNetwork); const std::vector& constants {weights, biases}; - LayerVerifierBaseWithDescriptorAndConstants verifier( - layerName, {inputInfo}, {outputInfo}, descriptor, constants); + LayerVerifierBaseWithDescriptorAndConstants verifier( + layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor, constants); + deserializedNetwork->ExecuteStrategy(verifier); } diff --git a/src/armnnTestUtils/CommonTestUtils.cpp b/src/armnnTestUtils/CommonTestUtils.cpp index c85330577d..472716c97c 100644 --- a/src/armnnTestUtils/CommonTestUtils.cpp +++ b/src/armnnTestUtils/CommonTestUtils.cpp @@ -9,15 +9,42 @@ using namespace armnn; -SubgraphView::InputSlots CreateInputsFrom(const std::vector& layers) +SubgraphView::InputSlots CreateInputsFrom(Layer* layer, + std::vector ignoreSlots) { SubgraphView::InputSlots result; - for (auto&& layer : layers) + for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it) { - for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it) + if (std::find(ignoreSlots.begin(), ignoreSlots.end(), it->GetSlotIndex()) != ignoreSlots.end()) + { + continue; + } + else { result.push_back(&(*it)); } + } + return result; +} + +// ignoreSlots assumes you want to ignore the same slots all on layers within the vector +SubgraphView::InputSlots CreateInputsFrom(const std::vector& layers, + std::vector ignoreSlots) +{ + SubgraphView::InputSlots result; + for (auto&& layer: layers) + { + for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it) + { + if (std::find(ignoreSlots.begin(), ignoreSlots.end(), it->GetSlotIndex()) != ignoreSlots.end()) + { + continue; + } + else + { + result.push_back(&(*it)); + } + } } return result; } diff --git a/src/armnnTestUtils/CommonTestUtils.hpp b/src/armnnTestUtils/CommonTestUtils.hpp index b75a32be61..5b4b356247 100644 --- a/src/armnnTestUtils/CommonTestUtils.hpp +++ b/src/armnnTestUtils/CommonTestUtils.hpp @@ -79,8 +79,11 @@ void SetWeightAndBias(ConvolutionLayer* layer, const armnn::TensorInfo& weightIn layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); } +armnn::SubgraphView::InputSlots CreateInputsFrom(armnn::Layer* layer, + std::vector ignoreSlots = {}); -armnn::SubgraphView::InputSlots CreateInputsFrom(const std::vector& layers); +armnn::SubgraphView::InputSlots CreateInputsFrom(const std::vector& layers, + std::vector ignoreSlots = {}); armnn::SubgraphView::OutputSlots CreateOutputsFrom(const std::vector& layers); diff --git a/src/armnnTestUtils/CreateWorkload.hpp b/src/armnnTestUtils/CreateWorkload.hpp index 2590ae89b2..7700a5573a 100644 --- a/src/armnnTestUtils/CreateWorkload.hpp +++ b/src/armnnTestUtils/CreateWorkload.hpp @@ -483,28 +483,37 @@ std::unique_ptr CreateConvolution2dWorkloadTest(armnn::IW layerDesc.m_PadBottom = 1; layerDesc.m_StrideX = 2; layerDesc.m_StrideY = 4; - layerDesc.m_BiasEnabled = true; + layerDesc.m_BiasEnabled = false; layerDesc.m_DataLayout = dataLayout; + float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0; + Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3}; TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3}; TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2}; + // As optimization isn't run member variables need to be updated. layer->m_Weight = std::make_unique(TensorInfo(weightShape, DataType)); - layer->m_Bias = std::make_unique(TensorInfo({2}, GetBiasDataType(DataType))); - layer->m_Weight->Allocate(); - layer->m_Bias->Allocate(); + + armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale); + weightsTensorInfo.SetConstant(); // Creates extra layers. Layer* const input = graph.AddLayer(0, "input"); + auto const weights = graph.AddLayer("weights"); Layer* const output = graph.AddLayer(0, "output"); + weights->m_LayerOutput = std::make_unique(weightsTensorInfo); + weights->m_LayerOutput->Allocate(); + // Connects up. - Connect(input, layer, TensorInfo(inputShape, DataType)); - Connect(layer, output, TensorInfo(outputShape, DataType)); + Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale)); + Connect(weights, layer, weightsTensorInfo, 0, 1); + Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale)); CreateTensorHandles(graph, factory); // Makes the workload and checks it. @@ -517,14 +526,11 @@ std::unique_ptr CreateConvolution2dWorkloadTest(armnn::IW CHECK(queueDescriptor.m_Parameters.m_PadRight == 3); CHECK(queueDescriptor.m_Parameters.m_PadTop == 1); CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1); - CHECK(queueDescriptor.m_Parameters.m_BiasEnabled); + CHECK(!queueDescriptor.m_Parameters.m_BiasEnabled); CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout)); - CHECK(queueDescriptor.m_Inputs.size() == 1); + CHECK(queueDescriptor.m_Inputs.size() == 2); CHECK(queueDescriptor.m_Outputs.size() == 1); - CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo(weightShape, DataType))); - CHECK((queueDescriptor.m_Bias->GetTensorInfo() == - TensorInfo({2}, GetBiasDataType(DataType)))); // Returns so we can do extra, backend-specific tests. return workload; @@ -548,19 +554,26 @@ std::unique_ptr CreateConvolution2dFusedActivationWithBlo layerDesc.m_BiasEnabled = true; layerDesc.m_DataLayout = dataLayout; + float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0; Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3}; TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3}; TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2}; - + // As optimization isn't run member variables need to be updated. layer->m_Weight = std::make_unique(TensorInfo(weightShape, DataType)); layer->m_Bias = std::make_unique(TensorInfo({2}, GetBiasDataType(DataType))); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); + armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale); + weightsTensorInfo.SetConstant(); + armnn::TensorInfo biasTensorInfo({2}, DataType, inputsQScale); + biasTensorInfo.SetConstant(); + auto activationDesc = std::make_shared(); activationDesc->m_A = 10.0f; activationDesc->m_B = 5.0f; @@ -579,11 +592,20 @@ std::unique_ptr CreateConvolution2dFusedActivationWithBlo // Creates extra layers. Layer* const input = graph.AddLayer(0, "input"); + auto const weights = graph.AddLayer("weights"); + auto const bias = graph.AddLayer("bias"); Layer* const output = graph.AddLayer(0, "output"); + weights->m_LayerOutput = std::make_unique(weightsTensorInfo); + weights->m_LayerOutput->Allocate(); + bias->m_LayerOutput = std::make_unique(biasTensorInfo); + bias->m_LayerOutput->Allocate(); + // Connects up. - Connect(input, layer, TensorInfo(inputShape, DataType)); - Connect(layer, output, TensorInfo(outputShape, DataType)); + Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale)); + Connect(weights, layer, weightsTensorInfo, 0, 1); + Connect(bias, layer, biasTensorInfo, 0, 2); + Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale)); CreateTensorHandles(graph, factory); // Makes the workload and checks it. @@ -606,11 +628,9 @@ std::unique_ptr CreateConvolution2dFusedActivationWithBlo CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1); CHECK(queueDescriptor.m_Parameters.m_BiasEnabled); CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout)); + CHECK(queueDescriptor.m_Outputs.size() == 1); - CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo(weightShape, DataType))); - CHECK((queueDescriptor.m_Bias->GetTensorInfo() == - TensorInfo({2}, GetBiasDataType(DataType)))); - CHECK(queueDescriptor.m_Inputs.size() == 1); + CHECK(queueDescriptor.m_Inputs.size() == 3); // Returns so we can do extra, backend-specific tests. return workload; @@ -630,28 +650,41 @@ std::unique_ptr CreateConvolution2dWorkloadFastMathTest(a layerDesc.m_PadBottom = 0; layerDesc.m_StrideX = 1; layerDesc.m_StrideY = 1; - layerDesc.m_BiasEnabled = false; + layerDesc.m_BiasEnabled = true; layerDesc.m_DataLayout = dataLayout; - Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); + float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0; - TensorShape weightShape = TensorShape{32, 32, 3, 3}; - TensorShape inputShape = TensorShape{1, 32, 149, 149}; - TensorShape outputShape = TensorShape{1, 32, 147, 147}; + Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); + TensorShape weightShape = TensorShape{ 32, 32, 3, 3 }; + TensorShape biasShape = TensorShape{ 32 }; + TensorShape inputShape = TensorShape{ 1, 32, 149, 149 }; + TensorShape outputShape = TensorShape{ 1, 32, 147, 147 }; + // As optimization isn't run member variables need to be updated. layer->m_Weight = std::make_unique(TensorInfo(weightShape, DataType)); - layer->m_Bias = std::make_unique(TensorInfo({2}, GetBiasDataType(DataType))); + layer->m_Bias = std::make_unique(TensorInfo(biasShape, GetBiasDataType(DataType))); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); + armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale); + weightsTensorInfo.SetConstant(); + armnn::TensorInfo biasTensorInfo(biasShape, DataType, inputsQScale); + biasTensorInfo.SetConstant(); + // Creates extra layers. Layer* const input = graph.AddLayer(0, "input"); + auto const weights = graph.AddLayer("weights"); + auto const bias = graph.AddLayer("bias"); Layer* const output = graph.AddLayer(0, "output"); // Connects up. Connect(input, layer, TensorInfo(inputShape, DataType)); - Connect(layer, output, TensorInfo(outputShape, DataType)); + Connect(weights, layer, weightsTensorInfo, 0, 1); + Connect(bias, layer, biasTensorInfo, 0, 2); + Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale)); CreateTensorHandles(graph, factory); // Makes the workload and checks it. @@ -666,9 +699,8 @@ std::unique_ptr CreateConvolution2dWorkloadFastMathTest(a CHECK(queueDescriptor.m_Parameters.m_PadBottom == 0); CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout)); - CHECK(queueDescriptor.m_Inputs.size() == 1); + CHECK(queueDescriptor.m_Inputs.size() == 3); CHECK(queueDescriptor.m_Outputs.size() == 1); - CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo(weightShape, DataType))); // Returns so we can do extra, backend-specific tests. return workload; @@ -1074,9 +1106,9 @@ std::unique_ptr CreateQLstmWorkloadTest(armnn::IWorkloadFactory& return workload; } -template +template std::unique_ptr CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory, - armnn::Graph& graph) + armnn::Graph& graph) { // Creates the layer we're testing. Convolution2dDescriptor layerDesc; @@ -1093,18 +1125,34 @@ std::unique_ptr CreateDirectConvolution2dWorkloadTest(arm float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0; float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0; - layer->m_Weight = std::make_unique(TensorInfo({ 2, 3, 3, 3 }, DataType, inputsQScale)); - layer->m_Bias = std::make_unique - (TensorInfo({2}, GetBiasDataType(DataType), inputsQScale)); + TensorShape biasShape = TensorShape{ 2 }; + TensorShape weightShape = TensorShape{ 2, 3, 3, 3 }; + armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale); + weightsTensorInfo.SetConstant(); + armnn::TensorInfo biasTensorInfo(biasShape, GetBiasDataType(DataType), inputsQScale); + biasTensorInfo.SetConstant(); + + layer->m_Weight = std::make_unique(weightsTensorInfo); + layer->m_Bias = std::make_unique(biasTensorInfo); + layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); // Creates extra layers. Layer* const input = graph.AddLayer(0, "input"); + auto const weights = graph.AddLayer("weights"); + auto const bias = graph.AddLayer("bias"); Layer* const output = graph.AddLayer(0, "output"); + weights->m_LayerOutput = std::make_unique(weightsTensorInfo); + weights->m_LayerOutput->Allocate(); + bias->m_LayerOutput = std::make_unique(biasTensorInfo); + bias->m_LayerOutput->Allocate(); + // Connects up. Connect(input, layer, TensorInfo({2, 3, 6, 6}, DataType, inputsQScale)); + Connect(weights, layer, weightsTensorInfo, 0, 1); + Connect(bias, layer, biasTensorInfo, 0, 2); Connect(layer, output, TensorInfo({2, 2, 6, 6}, DataType, outputQScale)); CreateTensorHandles(graph, factory); @@ -1120,12 +1168,10 @@ std::unique_ptr CreateDirectConvolution2dWorkloadTest(arm CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1); CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true); - CHECK(queueDescriptor.m_Inputs.size() == 1); + CHECK(queueDescriptor.m_Inputs.size() == 3); CHECK(queueDescriptor.m_Outputs.size() == 1); - CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 3, 3}, - DataType, inputsQScale))); - CHECK((queueDescriptor.m_Bias->GetTensorInfo() - == TensorInfo({2}, GetBiasDataType(DataType), inputsQScale))); + CHECK((queueDescriptor.m_Weight->GetTensorInfo() == weightsTensorInfo)); + CHECK((queueDescriptor.m_Bias->GetTensorInfo() == biasTensorInfo)); // Returns so we can do extra, backend-specific tests. return workload; @@ -2094,18 +2140,22 @@ std::pair> Cre armnn::ConstTensor biases(biasTensorInfo, biasData); // Create convolution layer with biases + ARMNN_NO_DEPRECATE_WARN_BEGIN convLayer = net->AddConvolution2dLayer(convDesc2d, weights, Optional(biases), convLayerName.c_str()); + ARMNN_NO_DEPRECATE_WARN_END } else { // Create convolution layer without biases + ARMNN_NO_DEPRECATE_WARN_BEGIN convLayer = net->AddConvolution2dLayer(convDesc2d, weights, EmptyOptional(), convLayerName.c_str()); + ARMNN_NO_DEPRECATE_WARN_END } CHECK(convLayer); diff --git a/src/armnnTestUtils/MockBackend.cpp b/src/armnnTestUtils/MockBackend.cpp index ac7f7c7fef..5dfe9a3b8b 100644 --- a/src/armnnTestUtils/MockBackend.cpp +++ b/src/armnnTestUtils/MockBackend.cpp @@ -66,6 +66,7 @@ bool IsLayerSupported(const armnn::Layer* layer) { case armnn::LayerType::Input: case armnn::LayerType::Output: + case armnn::LayerType::Constant: case armnn::LayerType::Addition: case armnn::LayerType::Convolution2d: // Layer supported diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp index 7cb9f6a7bc..aa07f7b3f9 100644 --- a/src/armnnTfLiteParser/TfLiteParser.cpp +++ b/src/armnnTfLiteParser/TfLiteParser.cpp @@ -417,6 +417,9 @@ armnn::TensorInfo ToTensorInfo(TfLiteParserImpl::TensorRawPtr tensorPtr, case tflite::TensorType_FLOAT32: type = armnn::DataType::Float32; break; + case tflite::TensorType_FLOAT16: + type = armnn::DataType::Float16; + break; case tflite::TensorType_INT8: if (tensorPtr->quantization->zero_point.size() == 1) { @@ -1067,58 +1070,64 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) CHECK_SUPPORTED_FUSED_ACTIVATION(options, subgraphIndex, operatorIndex); + auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex); + auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex); + CHECK_VALID_SIZE(outputs.size(), 1); + Convolution2dDescriptor desc; - desc.m_BiasEnabled = false; + inputs.size() == 3 ? + desc.m_BiasEnabled = true : desc.m_BiasEnabled = false; desc.m_StrideX = CHECKED_NON_NEGATIVE(options->stride_w); desc.m_StrideY = CHECKED_NON_NEGATIVE(options->stride_h); desc.m_DataLayout = armnn::DataLayout::NHWC; desc.m_DilationX = CHECKED_NON_NEGATIVE(options->dilation_w_factor); desc.m_DilationY = CHECKED_NON_NEGATIVE(options->dilation_h_factor); - auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex); - CHECK_VALID_SIZE(inputs.size(), 2, 3); - - auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex); - CHECK_VALID_SIZE(outputs.size(), 1); - - armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); + armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); armnn::TensorInfo filterTensorInfo = ToTensorInfo(inputs[1]); // assuming input is NHWC unsigned int inputHeight = inputTensorInfo.GetShape()[1]; - unsigned int inputWidth = inputTensorInfo.GetShape()[2]; + unsigned int inputWidth = inputTensorInfo.GetShape()[2]; // assuming the filter is OHWI : Output, H, W, Input // which is essentially the same as NHWC unsigned int filterHeight = filterTensorInfo.GetShape()[1]; - unsigned int filterWidth = filterTensorInfo.GetShape()[2]; + unsigned int filterWidth = filterTensorInfo.GetShape()[2]; CalcPadding(inputHeight, filterHeight, desc.m_StrideY, desc.m_DilationY, desc.m_PadTop, desc.m_PadBottom, options->padding); CalcPadding(inputWidth, filterWidth, desc.m_StrideX, desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, options->padding); - auto filterTensorAndData = CreateConstTensorNonPermuted(inputs[1], filterTensorInfo, inputTensorInfo.GetDataType()); - armnn::IConnectableLayer* layer = nullptr; + // Add the first input and weights tensor to the registration list. + // The constant weights will be added by SetupConstantLayers. + auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); + std::vector tensorIndexesToRegister = { inputTensorIndexes[0], inputTensorIndexes[1] }; auto layerName = fmt::format("Conv2D:{}:{}", subgraphIndex, operatorIndex); + armnn::IConnectableLayer* layer = m_Network->AddConvolution2dLayer(desc, layerName.c_str()); - if (inputs.size() == 3) + if (IsConstTensor(inputs[1]) && inputTensorInfo.GetDataType() == DataType::Float32 && + (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || + filterTensorInfo.GetDataType() == DataType::QAsymmS8)) { - desc.m_BiasEnabled = true; - armnn::TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensorNonPermuted(inputs[2], biasTensorInfo, inputTensorInfo.GetDataType()); - layer = m_Network->AddConvolution2dLayer(desc, - filterTensorAndData.first, - Optional(biasTensorAndData.first), - layerName.c_str()); + m_ConstantsToDequantize.emplace_back(inputs[1]->buffer); } - else + + if (desc.m_BiasEnabled) { - layer = m_Network->AddConvolution2dLayer(desc, - filterTensorAndData.first, - EmptyOptional(), - layerName.c_str()); + armnn::TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); + + // Add the biases input to the registration list, a constant layer will be added by SetupConstantLayers. + tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); + + if (IsConstTensor(inputs[2]) && inputTensorInfo.GetDataType() == DataType::Float32 && + (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || + filterTensorInfo.GetDataType() == DataType::QAsymmS8)) + { + m_ConstantsToDequantize.emplace_back(inputs[2]->buffer); + } } ARMNN_ASSERT(layer != nullptr); @@ -1128,13 +1137,12 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) // register the input connection slots for the layer, connections are made after all layers have been created // only the tensors for the inputs are relevant, exclude the const tensors - auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); - RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]}); + RegisterInputSlots(subgraphIndex, operatorIndex, layer, tensorIndexesToRegister); layer = AddFusedActivationLayer(layer, 0, options->fused_activation_function); // register the output connection slots for the layer, connections are made after all layers have been created auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex)); - RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]}); + RegisterOutputSlots(subgraphIndex, operatorIndex, layer, { outputTensorIndexes[0] }); } // Conv3D support was added in TF 2.5, so for backwards compatibility a hash define is needed. @@ -1261,7 +1269,6 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, options->padding); // ArmNN uses the same filter tensor layout at TfLite [1, H, W, O] no need for any permutation - auto filterTensor = CreateConstTensorNonPermuted(inputs[1], filterTensorInfo); auto layerName = fmt::format("DepthwiseConv2D:{}:{}", subgraphIndex, operatorIndex); auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); @@ -1275,7 +1282,6 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato { desc.m_BiasEnabled = true; TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensorNonPermuted(inputs[2], biasTensorInfo); // Add the biases input to the registration list, a constant layer will be added by SetupConstantLayers. tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp index de3a34ee08..a26442cb86 100644 --- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp +++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp @@ -225,28 +225,14 @@ LayerType* FuseConvolution2dLayer(OptimizationViews& optimizationViews, ActivationDescriptor& activationDesc, std::string name) { - std::shared_ptr weightHandle = baseLayer->m_Weight; - TensorInfo weightInfo = weightHandle->GetTensorInfo(); + IConnectableLayer* replacement = optimizationViews.GetINetwork() + ->AddConvolution2dLayer(baseLayer->GetParameters(), name.c_str()); - std::shared_ptr biasHandle = baseLayer->m_Bias; - ConstTensor biasTensor; - if (!biasHandle) - { - biasTensor = ConstTensor(); - } - else - { - biasTensor = ConstTensor(biasHandle->GetTensorInfo(), biasHandle->Map(true)); - } - - IConnectableLayer* replacement = - optimizationViews.GetINetwork()-> - AddConvolution2dLayer(baseLayer->GetParameters(), - ConstTensor(weightInfo, weightHandle->Map(true)), - Optional(biasTensor), - name.c_str()); LayerType* replacementLayer = PolymorphicDowncast(replacement); + replacementLayer->m_Weight = std::move(baseLayer->m_Weight); + replacementLayer->m_Bias = std::move(baseLayer->m_Bias); + FuseLayer(optimizationViews, baseLayer, replacementLayer, @@ -263,8 +249,9 @@ LayerType* FuseDepthwiseConvolution2dLayer(OptimizationViews& optimizationViews, ActivationDescriptor& activationDesc, std::string name) { - IConnectableLayer* replacement = optimizationViews.GetINetwork()-> - AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str()); + IConnectableLayer* replacement = + optimizationViews.GetINetwork()->AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str()); + LayerType* replacementLayer = PolymorphicDowncast(replacement); replacementLayer->m_Weight = std::move(baseLayer->m_Weight); diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 289f780fba..37fda3e210 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -1246,7 +1246,13 @@ void Convolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) co { const std::string descriptorName{"Convolution2dQueueDescriptor"}; - ValidateNumInputs(workloadInfo, descriptorName, 1); + uint32_t numInputs = 2; + if (m_Parameters.m_BiasEnabled) + { + numInputs = 3; + } + + ValidateNumInputs(workloadInfo, descriptorName, numInputs); ValidateNumOutputs(workloadInfo, descriptorName, 1); const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; @@ -1255,9 +1261,8 @@ void Convolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) co ValidateTensorNumDimensions(inputTensorInfo, descriptorName, 4, "input"); ValidateTensorNumDimensions(outputTensorInfo, descriptorName, 4, "output"); - ValidatePointer(m_Weight, descriptorName, "weight"); + const TensorInfo& weightTensorInfo = workloadInfo.m_InputTensorInfos[1]; - const TensorInfo& weightTensorInfo = m_Weight->GetTensorInfo(); ValidateTensorNumDimensions(weightTensorInfo, descriptorName, 4, "weight"); ValidateWeightDataType(inputTensorInfo, weightTensorInfo, descriptorName); @@ -1265,9 +1270,7 @@ void Convolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) co Optional optionalBiasTensorInfo; if (m_Parameters.m_BiasEnabled) { - ValidatePointer(m_Bias, descriptorName, "bias"); - - optionalBiasTensorInfo = MakeOptional(m_Bias->GetTensorInfo()); + optionalBiasTensorInfo = MakeOptional(workloadInfo.m_InputTensorInfos[2]); const TensorInfo& biasTensorInfo = optionalBiasTensorInfo.value(); ValidateTensorDataType(biasTensorInfo, GetBiasDataType(inputTensorInfo.GetDataType()), descriptorName, "bias"); diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index f624ee6021..3660e6e721 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -246,7 +246,10 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, const TensorInfo input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), dataType); const TensorInfo output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); - ARMNN_ASSERT(cLayer->m_Weight.get() != nullptr); + ARMNN_ASSERT_MSG(layer.GetInputSlot(1).GetConnection(), + "Convolution2dLayer: Weights should be connected as a Constant Layer."); + const TensorInfo weights = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), + dataType); const Convolution2dDescriptor& descriptor = cLayer->GetParameters(); @@ -254,14 +257,17 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, Optional biases; if (descriptor.m_BiasEnabled) { - biases = OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType)); + ARMNN_ASSERT_MSG(layer.GetInputSlot(2).GetConnection(), + "Convolution2dLayer: Bias should be connected as a Constant Layer."); + biases = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), + GetBiasTypeFromWeightsType(dataType)); } result = layerSupportObject.IsConvolution2dSupported( input, output, descriptor, - OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType), + weights, biases, reason); break; diff --git a/src/backends/backendsCommon/test/DynamicBackendTests.hpp b/src/backends/backendsCommon/test/DynamicBackendTests.hpp index f53bd83100..0d98804954 100644 --- a/src/backends/backendsCommon/test/DynamicBackendTests.hpp +++ b/src/backends/backendsCommon/test/DynamicBackendTests.hpp @@ -1465,7 +1465,7 @@ void CreateReferenceDynamicBackendTestImpl() Convolution2dQueueDescriptor convolution2dQueueDescriptor; WorkloadInfo workloadInfo { - { inputInfo }, + { inputInfo, weightInfo }, { outputInfo } }; convolution2dQueueDescriptor.m_Inputs.push_back(nullptr); diff --git a/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp index 1076aa6669..0d2d2cb2de 100644 --- a/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp @@ -110,8 +110,11 @@ armnn::INetworkPtr CreateFullyConnectedNetworkNoConnectedWeightsExplicit(const a { armnn::INetworkPtr network(armnn::INetwork::Create()); + + ConstTensor biases; + armnn::IConnectableLayer* inputLayer = network->AddInputLayer(0, "Input"); - armnn::IConnectableLayer* biasLayer = network->AddInputLayer(2, "Bias_Input"); + armnn::IConnectableLayer* biasLayer = network->AddConstantLayer(biases, "Bias_Input"); armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, "Fully_Connected"); armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "Output"); @@ -402,16 +405,7 @@ void FullyConnectedErrorChecking(const std::vector& backends, IRuntime::CreationOptions options; IRuntimePtr runtime(IRuntime::Create(options)); - try - { - Optimize(*network, backends, runtime->GetDeviceSpec()); - FAIL("LayerValidationException should have been thrown"); - } - catch (const LayerValidationException& exc) - { - CHECK(strcmp(exc.what(), "Fully_Connected layer weights not set: Input slot(s) 1 not connected " - "to an output slot on FullyConnected layer \"Fully_Connected\"") == 0); - } + CHECK_THROWS_AS(Optimize(*network, backends, runtime->GetDeviceSpec()), LayerValidationException); } else if (!connectedBias) { @@ -429,16 +423,7 @@ void FullyConnectedErrorChecking(const std::vector& backends, IRuntime::CreationOptions options; IRuntimePtr runtime(IRuntime::Create(options)); - try - { - Optimize(*network, backends, runtime->GetDeviceSpec()); - FAIL("LayerValidationException should have been thrown"); - } - catch (const LayerValidationException& exc) - { - CHECK(strcmp(exc.what(), "Fully_Connected layer bias not set: Input slot(s) 2 not connected " - "to an output slot on FullyConnected layer \"Fully_Connected\"") == 0); - } + CHECK_THROWS_AS(Optimize(*network, backends, runtime->GetDeviceSpec()), LayerValidationException); } } else if(!connectedWeights && !connectedBias) @@ -452,17 +437,7 @@ void FullyConnectedErrorChecking(const std::vector& backends, IRuntime::CreationOptions options; IRuntimePtr runtime(IRuntime::Create(options)); - try - { - Optimize(*network, backends, runtime->GetDeviceSpec()); - FAIL("LayerValidationException should have been thrown"); - } - catch (const LayerValidationException& exc) - { - CHECK(strcmp(exc.what(), "Fully_Connected layer weights and bias not set: Input slot(s) 1 & 2 not " - "connected to an output slot on FullyConnected layer \"Fully_Connected\"") == 0); - } - + CHECK_THROWS_AS(Optimize(*network, backends, runtime->GetDeviceSpec()), LayerValidationException); } else if(!tensorInfoSet) { diff --git a/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp b/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp index 56f15a51e5..5ceb8ae4b4 100644 --- a/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp +++ b/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp @@ -70,49 +70,68 @@ TEST_CASE("ReleaseBatchNormalizationLayerConstantDataTest") } +TEST_CASE("ReleaseConvolution2dLayerConstantDataTest") +{ + Graph graph; + + // create the layer we're testing + Convolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 3; + layerDesc.m_PadRight = 3; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 4; + layerDesc.m_BiasEnabled = true; + + Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); - TEST_CASE("ReleaseConvolution2dLayerConstantDataTest") - { - Graph graph; + layer->m_Weight = std::make_unique(TensorInfo({ 2, 3, 5, 3 }, + armnn::DataType::Float32)); + layer->m_Bias = std::make_unique + (TensorInfo({ 2 }, GetBiasDataType(armnn::DataType::Float32))); - // create the layer we're testing - Convolution2dDescriptor layerDesc; - layerDesc.m_PadLeft = 3; - layerDesc.m_PadRight = 3; - layerDesc.m_PadTop = 1; - layerDesc.m_PadBottom = 1; - layerDesc.m_StrideX = 2; - layerDesc.m_StrideY = 4; - layerDesc.m_BiasEnabled = true; + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); - Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); + ConstantLayer* weightsLayer = graph.AddLayer("Weights"); + ConstantLayer* biasLayer = graph.AddLayer("Bias"); - layer->m_Weight = std::make_unique(TensorInfo({2, 3, 5, 3}, - armnn::DataType::Float32)); - layer->m_Bias = std::make_unique - (TensorInfo({2}, GetBiasDataType(armnn::DataType::Float32))); + weightsLayer->m_LayerOutput = std::make_shared(TensorInfo({ 2, 3, 5, 3 }, + armnn::DataType::Float32)); - layer->m_Weight->Allocate(); - layer->m_Bias->Allocate(); + biasLayer->m_LayerOutput = std::make_shared( + TensorInfo({2}, GetBiasDataType(armnn::DataType::Float32))); - // create extra layers - Layer* const input = graph.AddLayer(0, "input"); - Layer* const output = graph.AddLayer(0, "output"); + TensorInfo weightsInfo = weightsLayer->m_LayerOutput->GetTensorInfo(); + weightsInfo.SetConstant(); + TensorInfo biasInfo = biasLayer->m_LayerOutput->GetTensorInfo(); + biasInfo.SetConstant(); - // connect up - Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); - Connect(layer, output, TensorInfo({2, 2, 2, 10}, armnn::DataType::Float32)); - // check the constants that they are not NULL - CHECK(layer->m_Weight != nullptr); - CHECK(layer->m_Bias != nullptr); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasInfo); - // free up the constants.. - layer->ReleaseConstantData(); + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); - // check the constants that they are NULL now - CHECK(layer->m_Weight == nullptr); - CHECK(layer->m_Bias == nullptr); + // connect up + Connect(input, layer, TensorInfo({ 2, 3, 8, 16 }, armnn::DataType::Float32)); + weightsLayer->GetOutputSlot().Connect(layer->GetInputSlot(1)); + biasLayer->GetOutputSlot().Connect(layer->GetInputSlot(2)); + Connect(layer, output, TensorInfo({ 2, 2, 2, 10 }, armnn::DataType::Float32)); + + // check the constants that they are not NULL + CHECK(layer->m_Weight != nullptr); + CHECK(layer->m_Bias != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + CHECK(layer->m_Weight == nullptr); + CHECK(layer->m_Bias == nullptr); } TEST_CASE("ReleaseDepthwiseConvolution2dLayerConstantDataTest") @@ -131,8 +150,10 @@ TEST_CASE("ReleaseDepthwiseConvolution2dLayerConstantDataTest") DepthwiseConvolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); - layer->m_Weight = std::make_unique(TensorInfo({3, 3, 5, 3}, DataType::Float32)); - layer->m_Bias = std::make_unique(TensorInfo({9}, DataType::Float32)); + layer->m_Weight = std::make_unique( + TensorInfo({3, 3, 5, 3}, DataType::Float32)); + layer->m_Bias = std::make_unique( + TensorInfo({9}, DataType::Float32)); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); @@ -170,10 +191,10 @@ TEST_CASE("ReleaseFullyConnectedLayerConstantDataTest") float inputsQScale = 1.0f; float outputQScale = 2.0f; - layer->m_Weight = std::make_unique(TensorInfo({7, 20}, - DataType::QAsymmU8, inputsQScale, 0)); - layer->m_Bias = std::make_unique(TensorInfo({7}, - GetBiasDataType(DataType::QAsymmU8), inputsQScale)); + layer->m_Weight = std::make_unique( + TensorInfo({7, 20}, DataType::QAsymmU8, inputsQScale, 0)); + layer->m_Bias = std::make_unique( + TensorInfo({7}, GetBiasDataType(DataType::QAsymmU8), inputsQScale)); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); diff --git a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp index 1219ac5a33..9b86784dce 100644 --- a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp +++ b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp @@ -61,31 +61,35 @@ TEST_CASE("OptimizedViewsSubgraphLayerCount") Layer* const inputLayer = baseGraph.AddLayer(0, "input"); Convolution2dDescriptor convDescriptor; - PreCompiledDescriptor substitutionLayerDescriptor(1, 1); + PreCompiledDescriptor substitutionLayerDescriptor(2, 1); Layer* const convLayer1 = baseGraph.AddLayer(convDescriptor, "conv1"); Layer* const convLayer2 = baseGraph.AddLayer(convDescriptor, "conv2"); + Layer* const weightsLayer1 = baseGraph.AddLayer("weights1"); + Layer* const weightsLayer2 = baseGraph.AddLayer("weights2"); Layer* const substitutableCompiledLayer = baseGraph.AddLayer(substitutionLayerDescriptor, "pre-compiled"); Layer* const outputLayer = baseGraph.AddLayer(0, "output"); inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + weightsLayer1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(1)); convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + weightsLayer2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(1)); convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Subgraph for a failed layer SubgraphViewSelector::SubgraphViewPtr failedSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer1}), {convLayer1}); // Subgraph for an untouched layer SubgraphViewSelector::SubgraphViewPtr untouchedSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer2}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer2), CreateOutputsFrom({convLayer2}), {convLayer2}); // Subgraph for a substitutable layer SubgraphViewSelector::SubgraphViewPtr substitutableSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {substitutableCompiledLayer}); // Create a Graph containing a layer to substitute in @@ -95,7 +99,7 @@ TEST_CASE("OptimizedViewsSubgraphLayerCount") // Subgraph for a substitution layer SubgraphViewSelector::SubgraphViewPtr substitutionSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({substitutionpreCompiledLayer}), + CreateSubgraphViewFrom(CreateInputsFrom(substitutionpreCompiledLayer), CreateOutputsFrom({substitutionpreCompiledLayer}), {substitutionpreCompiledLayer}); @@ -106,14 +110,14 @@ TEST_CASE("OptimizedViewsSubgraphLayerCount") view.AddUntouchedSubgraph(SubgraphView(*untouchedSubgraph)); SubgraphViewSelector::SubgraphViewPtr baseSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {substitutionpreCompiledLayer}); view.AddSubstitution({*baseSubgraph, *substitutionSubgraph}); // Construct original subgraph to compare against SubgraphViewSelector::SubgraphViewPtr originalSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {convLayer1, convLayer2, substitutionpreCompiledLayer}); @@ -147,11 +151,11 @@ TEST_CASE("OptimizedViewsSubgraphLayerCountUsingGetINetwork") convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Subgraph for a failed layer - SubgraphViewSelector::SubgraphViewPtr failedSubgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + SubgraphViewSelector::SubgraphViewPtr failedSubgraph = CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer1}), {convLayer1}); // Subgraph for an untouched layer - SubgraphViewSelector::SubgraphViewPtr untouchedSubgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer2}), + SubgraphViewSelector::SubgraphViewPtr untouchedSubgraph = CreateSubgraphViewFrom(CreateInputsFrom(convLayer2), CreateOutputsFrom({convLayer2}), {convLayer2}); @@ -162,21 +166,21 @@ TEST_CASE("OptimizedViewsSubgraphLayerCountUsingGetINetwork") // Subgraph for a substitution layer SubgraphViewSelector::SubgraphViewPtr substitutionSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({substitutionpreCompiledLayer}), + CreateSubgraphViewFrom(CreateInputsFrom(substitutionpreCompiledLayer), CreateOutputsFrom({substitutionpreCompiledLayer}), {substitutionpreCompiledLayer}); view.AddFailedSubgraph(SubgraphView(*failedSubgraph)); view.AddUntouchedSubgraph(SubgraphView(*untouchedSubgraph)); - SubgraphViewSelector::SubgraphViewPtr baseSubgraph = CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + SubgraphViewSelector::SubgraphViewPtr baseSubgraph = CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {substitutionpreCompiledLayer}); view.AddSubstitution({*baseSubgraph, *substitutionSubgraph}); // Construct original subgraph to compare against SubgraphViewSelector::SubgraphViewPtr originalSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {convLayer1, convLayer2, substitutionpreCompiledLayer}); @@ -192,26 +196,31 @@ TEST_CASE("OptimizedViewsSubgraphLayerCountFailValidate") Layer* const inputLayer = baseGraph.AddLayer(0, "input"); Convolution2dDescriptor convDescriptor; - PreCompiledDescriptor substitutionLayerDescriptor(1, 1); + PreCompiledDescriptor substitutionLayerDescriptor(2, 1); Layer* const convLayer1 = baseGraph.AddLayer(convDescriptor, "conv1"); Layer* const convLayer2 = baseGraph.AddLayer(convDescriptor, "conv2"); + Layer* const weightsLayer1 = baseGraph.AddLayer("weights1"); + Layer* const weightsLayer2 = baseGraph.AddLayer("weights2"); Layer* const substitutableCompiledLayer = baseGraph.AddLayer(substitutionLayerDescriptor, "pre-compiled"); Layer* const outputLayer = baseGraph.AddLayer(0, "output"); + inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + weightsLayer1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(1)); convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + weightsLayer2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(1)); convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Subgraph for an untouched layer SubgraphViewSelector::SubgraphViewPtr untouchedSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer2}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer2), CreateOutputsFrom({convLayer2}), {convLayer2}); // Subgraph for a substitutable layer SubgraphViewSelector::SubgraphViewPtr substitutableSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {substitutableCompiledLayer}); // Create a Graph containing a layer to substitute in @@ -221,7 +230,7 @@ TEST_CASE("OptimizedViewsSubgraphLayerCountFailValidate") // Subgraph for a substitution layer SubgraphViewSelector::SubgraphViewPtr substitutionSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({substitutionpreCompiledLayer}), + CreateSubgraphViewFrom(CreateInputsFrom(substitutionpreCompiledLayer), CreateOutputsFrom({substitutionpreCompiledLayer}), {substitutionpreCompiledLayer}); @@ -231,14 +240,14 @@ TEST_CASE("OptimizedViewsSubgraphLayerCountFailValidate") view.AddUntouchedSubgraph(SubgraphView(*untouchedSubgraph)); SubgraphViewSelector::SubgraphViewPtr baseSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {substitutionpreCompiledLayer}); view.AddSubstitution({*baseSubgraph, *substitutionSubgraph}); // Construct original subgraph to compare against SubgraphViewSelector::SubgraphViewPtr originalSubgraph = - CreateSubgraphViewFrom(CreateInputsFrom({convLayer1}), + CreateSubgraphViewFrom(CreateInputsFrom(convLayer1), CreateOutputsFrom({convLayer2}), {convLayer1, convLayer2, substitutionpreCompiledLayer}); diff --git a/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp b/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp index ad59704e2a..45fcf19f90 100644 --- a/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp +++ b/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp @@ -106,6 +106,21 @@ Convolution2dLayer* AddConvolutionLayer(Graph& graph, return convLayer; } +// Convenience function to add a constant layer to a graph +ConstantLayer* AddConstantLayer(Graph& graph, + LayerNameToLayerMap& layersInGraph, + const std::string& layerName, + const ConstTensor& constTensor, + const TensorInfo& outputInfo) +{ + ConstantLayer* const constantLayer = graph.AddLayer(layerName.c_str()); + CHECK(constantLayer); + constantLayer->m_LayerOutput = std::make_shared(constTensor); + constantLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + layersInGraph.insert(std::make_pair(constantLayer->GetName(), constantLayer)); + return constantLayer; +} + // Convenience function to add a pooling layer to a graph Pooling2dLayer* AddPoolingLayer(Graph& graph, LayerNameToLayerMap& layersInGraph, @@ -246,7 +261,7 @@ SubgraphView::SubgraphViewPtr BuildFullyUnsupportedSubgraph1(Graph& graph, Layer poolingLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Create the subgraph view for the whole network - return CreateSubgraphViewFrom(CreateInputsFrom({poolingLayer}), + return CreateSubgraphViewFrom(CreateInputsFrom(poolingLayer), CreateOutputsFrom({poolingLayer}), {poolingLayer}); } @@ -287,7 +302,7 @@ SubgraphView::SubgraphViewPtr BuildFullyUnsupportedSubgraph2(Graph& graph, Layer pooling3Layer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Create the subgraph view for the whole network - return CreateSubgraphViewFrom(CreateInputsFrom({pooling1Layer}), + return CreateSubgraphViewFrom(CreateInputsFrom(pooling1Layer), CreateOutputsFrom({pooling3Layer}), {pooling1Layer, pooling2Layer, @@ -299,8 +314,11 @@ SubgraphView::SubgraphViewPtr BuildFullyOptimizableSubgraph1(Graph& graph, Layer { const TensorInfo inputInfo ({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); const TensorInfo outputInfo({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); - const TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); - const TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); + TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + + weightInfo.SetConstant(true); + biasInfo.SetConstant(true); Convolution2dDescriptor convolutionDescriptor; convolutionDescriptor.m_StrideX = 1; @@ -308,20 +326,34 @@ SubgraphView::SubgraphViewPtr BuildFullyOptimizableSubgraph1(Graph& graph, Layer convolutionDescriptor.m_BiasEnabled = true; convolutionDescriptor.m_DataLayout = DataLayout::NHWC; + std::vector weightsVector(64); + ConstTensor constWeightsTensor(weightInfo, weightsVector); + + std::vector biasVector(16); + ConstTensor constBiasTensor(biasInfo, biasVector); + // Construct the graph Layer* const inputLayer = AddInputLayer(graph, "input layer", inputInfo); Convolution2dLayer* const convLayer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv layer", weightInfo, biasInfo, outputInfo); + + ConstantLayer* const weightsLayer = + AddConstantLayer(graph, layersInGraph, "Weights Layer", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer = AddConstantLayer(graph, layersInGraph, "Bias Layer", constBiasTensor, outputInfo); + Layer* const outputLayer = AddOutputLayer(graph, "output layer"); // Connect the network inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1)); + biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2)); convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + std::vector ignoreSlots = {1, 2}; // Create the subgraph view for the whole network - return CreateSubgraphViewFrom(CreateInputsFrom({convLayer}), + return CreateSubgraphViewFrom(CreateInputsFrom(convLayer, ignoreSlots), CreateOutputsFrom({convLayer}), - {convLayer}); + {convLayer, weightsLayer, biasLayer}); } // Creates a subgraph with five convolutions layers, all supported by the mock backend @@ -329,8 +361,18 @@ SubgraphView::SubgraphViewPtr BuildFullyOptimizableSubgraph2(Graph& graph, Layer { const TensorInfo inputInfo ({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); const TensorInfo outputInfo({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); - const TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); - const TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); + TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + + weightInfo.SetConstant(true); + biasInfo.SetConstant(true); + + std::vector weightsVector(64); + ConstTensor constWeightsTensor(weightInfo, weightsVector); + + std::vector biasVector(16); + ConstTensor constBiasTensor(biasInfo, biasVector); + Convolution2dDescriptor convolutionDescriptor; convolutionDescriptor.m_StrideX = 1; @@ -342,32 +384,84 @@ SubgraphView::SubgraphViewPtr BuildFullyOptimizableSubgraph2(Graph& graph, Layer Layer* const inputLayer = AddInputLayer(graph, "input layer", inputInfo); Convolution2dLayer* const conv1Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv1 layer", weightInfo, biasInfo, outputInfo); + ConstantLayer* const weightsLayer1 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 1", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer1 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 1", constBiasTensor, outputInfo); + Convolution2dLayer* const conv2Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv2 layer", weightInfo, biasInfo, outputInfo); + ConstantLayer* const weightsLayer2 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 2", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer2 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 2", constBiasTensor, outputInfo); + + Convolution2dLayer* const conv3Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv3 layer", weightInfo, biasInfo, outputInfo); + ConstantLayer* const weightsLayer3 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 3", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer3 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 3", constBiasTensor, outputInfo); + Convolution2dLayer* const conv4Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv4 layer", weightInfo, biasInfo, outputInfo); + ConstantLayer* const weightsLayer4 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 4", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer4 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 4", constBiasTensor, outputInfo); + Convolution2dLayer* const conv5Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv5 layer", weightInfo, biasInfo, outputInfo); + ConstantLayer* const weightsLayer5 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 5", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer5 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 5", constBiasTensor, outputInfo); + + Layer* const outputLayer = AddOutputLayer(graph, "output layer"); // Connect the network inputLayer->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(0)); + weightsLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(1)); + biasLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(2)); + conv1Layer->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(0)); + weightsLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(1)); + biasLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(2)); + conv2Layer->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(0)); + weightsLayer3->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(1)); + biasLayer3->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(2)); + conv3Layer->GetOutputSlot(0).Connect(conv4Layer->GetInputSlot(0)); + weightsLayer4->GetOutputSlot(0).Connect(conv4Layer->GetInputSlot(1)); + biasLayer4->GetOutputSlot(0).Connect(conv4Layer->GetInputSlot(2)); + conv4Layer->GetOutputSlot(0).Connect(conv5Layer->GetInputSlot(0)); - conv5Layer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + weightsLayer5->GetOutputSlot(0).Connect(conv5Layer->GetInputSlot(1)); + biasLayer5->GetOutputSlot(0).Connect(conv5Layer->GetInputSlot(2)); + conv5Layer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + std::vector ignoreSlots = {1, 2}; // Create the subgraph view for the whole network - return CreateSubgraphViewFrom(CreateInputsFrom({conv1Layer}), - CreateOutputsFrom({conv5Layer}), - {conv1Layer, - conv2Layer, - conv3Layer, - conv4Layer, - conv5Layer}); + return CreateSubgraphViewFrom(CreateInputsFrom(conv1Layer, ignoreSlots), + CreateOutputsFrom({ conv5Layer }), + { weightsLayer1, + biasLayer1, + conv1Layer, + weightsLayer2, + biasLayer2, + conv2Layer, + weightsLayer3, + biasLayer3, + conv3Layer, + weightsLayer4, + biasLayer4, + conv4Layer, + weightsLayer5, + biasLayer5, + conv5Layer }); } // Creates a subgraph with both supported and unsupported layers @@ -376,8 +470,17 @@ SubgraphView::SubgraphViewPtr BuildPartiallySupportedSubgraph(Graph& graph, Laye { const TensorInfo inputInfo ({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); const TensorInfo outputInfo({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); - const TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); - const TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); + TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + + weightInfo.SetConstant(true); + biasInfo.SetConstant(true); + + std::vector weightsVector(64); + ConstTensor constWeightsTensor(weightInfo, weightsVector); + + std::vector biasVector(16); + ConstTensor constBiasTensor(biasInfo, biasVector); Convolution2dDescriptor convolutionDescriptor; convolutionDescriptor.m_StrideX = 1; @@ -400,12 +503,25 @@ SubgraphView::SubgraphViewPtr BuildPartiallySupportedSubgraph(Graph& graph, Laye // Construct the graph Layer* const inputLayer = AddInputLayer(graph, "input layer", inputInfo); + ConstantLayer* const weightsLayer1 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 1", constWeightsTensor, outputInfo); + + ConstantLayer* const biasLayer1 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 1", constBiasTensor, outputInfo); + Convolution2dLayer* const conv1Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv1 layer", weightInfo, biasInfo, outputInfo); Pooling2dLayer* const pooling1Layer = AddPoolingLayer(graph, layersInGraph, poolingDescriptor, "pooling1 layer", outputInfo); Pooling2dLayer* const pooling2Layer = AddPoolingLayer(graph, layersInGraph, poolingDescriptor, "pooling2 layer", outputInfo); + + ConstantLayer* const weightsLayer2 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 2", constWeightsTensor, outputInfo); + + ConstantLayer* const biasLayer2 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 2", constBiasTensor, outputInfo); + Convolution2dLayer* const conv2Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv2 layer", weightInfo, biasInfo, outputInfo); Pooling2dLayer* const pooling3Layer = AddPoolingLayer(graph, layersInGraph, poolingDescriptor, @@ -414,18 +530,27 @@ SubgraphView::SubgraphViewPtr BuildPartiallySupportedSubgraph(Graph& graph, Laye // Connect the network inputLayer->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(0)); + weightsLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(1)); + biasLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(2)); conv1Layer->GetOutputSlot(0).Connect(pooling1Layer->GetInputSlot(0)); pooling1Layer->GetOutputSlot(0).Connect(pooling2Layer->GetInputSlot(0)); pooling2Layer->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(0)); + weightsLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(1)); + biasLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(2)); conv2Layer->GetOutputSlot(0).Connect(pooling3Layer->GetInputSlot(0)); pooling3Layer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + std::vector ignoreSlots = {1, 2}; // Create the subgraph view for the whole network - return CreateSubgraphViewFrom(CreateInputsFrom({conv1Layer}), + return CreateSubgraphViewFrom(CreateInputsFrom(conv1Layer, ignoreSlots), CreateOutputsFrom({pooling3Layer}), - {conv1Layer, + {weightsLayer1, + biasLayer1, + conv1Layer, pooling1Layer, pooling2Layer, + weightsLayer2, + biasLayer2, conv2Layer, pooling3Layer}); } @@ -435,9 +560,17 @@ SubgraphView::SubgraphViewPtr BuildFullyUnoptimizableSubgraph1(Graph& graph, Lay { const TensorInfo inputInfo ({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); const TensorInfo outputInfo({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); - const TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); - const TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); + TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + weightInfo.SetConstant(true); + biasInfo.SetConstant(true); + + std::vector weightsVector(64); + ConstTensor constWeightsTensor(weightInfo, weightsVector); + + std::vector biasVector(16); + ConstTensor constBiasTensor(biasInfo, biasVector); Convolution2dDescriptor convolutionDescriptor; convolutionDescriptor.m_StrideX = 1; convolutionDescriptor.m_StrideY = 1; @@ -446,6 +579,13 @@ SubgraphView::SubgraphViewPtr BuildFullyUnoptimizableSubgraph1(Graph& graph, Lay // Construct the graph Layer* const inputLayer = AddInputLayer(graph, "input layer", inputInfo); + + ConstantLayer* const weightsLayer = + AddConstantLayer(graph, layersInGraph, "Weights Layer unoptimizable", constWeightsTensor, outputInfo); + + ConstantLayer* const biasLayer = + AddConstantLayer(graph, layersInGraph, "Bias Layer unoptimizable", constBiasTensor, outputInfo); + Convolution2dLayer* const convLayer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv layer unoptimizable", weightInfo, biasInfo, outputInfo); @@ -453,12 +593,15 @@ SubgraphView::SubgraphViewPtr BuildFullyUnoptimizableSubgraph1(Graph& graph, Lay // Connect the network inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1)); + biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2)); convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + std::vector ignoreSlots = {1, 2}; // Create the subgraph view for the whole network - return CreateSubgraphViewFrom(CreateInputsFrom({convLayer}), + return CreateSubgraphViewFrom(CreateInputsFrom(convLayer, ignoreSlots), CreateOutputsFrom({convLayer}), - {convLayer}); + {convLayer, weightsLayer, biasLayer}); } // Creates a subgraph with some unoptimizable layers ("unoptimizable" is added to the layer's name) @@ -466,8 +609,17 @@ SubgraphView::SubgraphViewPtr BuildPartiallyOptimizableSubgraph1(Graph& graph, L { const TensorInfo inputInfo ({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); const TensorInfo outputInfo({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); - const TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); - const TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); + TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + + weightInfo.SetConstant(true); + biasInfo.SetConstant(true); + + std::vector weightsVector(64); + ConstTensor constWeightsTensor(weightInfo, weightsVector); + + std::vector biasVector(16); + ConstTensor constBiasTensor(biasInfo, biasVector); Convolution2dDescriptor convolutionDescriptor; convolutionDescriptor.m_StrideX = 1; @@ -477,36 +629,93 @@ SubgraphView::SubgraphViewPtr BuildPartiallyOptimizableSubgraph1(Graph& graph, L // Construct the graph Layer* const inputLayer = AddInputLayer(graph, "input layer", inputInfo); - Convolution2dLayer* const conv1Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, - "conv1 layer", weightInfo, biasInfo, outputInfo); - Convolution2dLayer* const conv2Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, - "conv2 layer unoptimizable", weightInfo, biasInfo, - outputInfo); - Convolution2dLayer* const conv3Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, - "conv3 layer", weightInfo, biasInfo, outputInfo); - Convolution2dLayer* const conv4Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, - "conv4 layer unoptimizable", weightInfo, biasInfo, - outputInfo); - Convolution2dLayer* const conv5Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, - "conv5 layer", weightInfo, biasInfo, outputInfo); - Layer* const outputLayer = AddOutputLayer(graph, "output layer"); + + ConstantLayer* const weightsLayer1 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 1", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer1 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 1", constBiasTensor, outputInfo); + ConstantLayer* const weightsLayer2 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 2 unoptimizable", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer2 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 2 unoptimizable", constBiasTensor, outputInfo); + ConstantLayer* const weightsLayer3 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 3", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer3 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 3", constBiasTensor, outputInfo); + ConstantLayer* const weightsLayer4 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 4 unoptimizable", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer4 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 4 unoptimizable", constBiasTensor, outputInfo); + ConstantLayer* const weightsLayer5 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 5", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer5 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 5", constBiasTensor, outputInfo); + + Convolution2dLayer* const conv1Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, + "conv1 layer", weightInfo, biasInfo, outputInfo); + Convolution2dLayer* const conv2Layer = AddConvolutionLayer(graph, + layersInGraph, + convolutionDescriptor, + "conv2 layer unoptimizable", + weightInfo, + biasInfo, + outputInfo); + Convolution2dLayer* const conv3Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, + "conv3 layer", weightInfo, biasInfo, outputInfo); + Convolution2dLayer* const conv4Layer = AddConvolutionLayer(graph, + layersInGraph, + convolutionDescriptor, + "conv4 layer unoptimizable", + weightInfo, + biasInfo, + outputInfo); + Convolution2dLayer* const conv5Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, + "conv5 layer", weightInfo, biasInfo, outputInfo); + + Layer* const outputLayer = AddOutputLayer(graph, "output layer"); // Connect the network inputLayer->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(0)); + weightsLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(1)); + biasLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(2)); + conv1Layer->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(0)); + weightsLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(1)); + biasLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(2)); + conv2Layer->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(0)); + weightsLayer3->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(1)); + biasLayer3->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(2)); + conv3Layer->GetOutputSlot(0).Connect(conv4Layer->GetInputSlot(0)); + weightsLayer4->GetOutputSlot(0).Connect(conv4Layer->GetInputSlot(1)); + biasLayer4->GetOutputSlot(0).Connect(conv4Layer->GetInputSlot(2)); + conv4Layer->GetOutputSlot(0).Connect(conv5Layer->GetInputSlot(0)); + weightsLayer5->GetOutputSlot(0).Connect(conv5Layer->GetInputSlot(1)); + biasLayer5->GetOutputSlot(0).Connect(conv5Layer->GetInputSlot(2)); + conv5Layer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + std::vector ignoreSlots = {1, 2}; // Create the subgraph view for the whole network - return CreateSubgraphViewFrom(CreateInputsFrom({conv1Layer}), + return CreateSubgraphViewFrom(CreateInputsFrom(conv1Layer, ignoreSlots), CreateOutputsFrom({conv5Layer}), - {conv1Layer, - conv2Layer, - conv3Layer, - conv4Layer, - conv5Layer}); + {weightsLayer1, + biasLayer1, + conv1Layer, + weightsLayer2, + biasLayer2, + conv2Layer, + weightsLayer3, + biasLayer3, + conv3Layer, + weightsLayer4, + biasLayer4, + conv4Layer, + weightsLayer5, + biasLayer5, + conv5Layer}); } // Creates a subgraph with some input unoptimizable layers ("unoptimizable" is added to the layer's name), @@ -515,8 +724,17 @@ SubgraphView::SubgraphViewPtr BuildPartiallyOptimizableSubgraph2(Graph& graph, L { const TensorInfo inputInfo ({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); const TensorInfo outputInfo({ 1, 16, 16, 16 }, DataType::QAsymmU8, 1.0f, 0); - const TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); - const TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + TensorInfo weightInfo({ 16, 1, 1, 16 }, DataType::QAsymmU8, 0.9f, 0); + TensorInfo biasInfo ({ 1, 1, 1, 16 }, DataType::Signed32, 0.9f, 0); + + weightInfo.SetConstant(true); + biasInfo.SetConstant(true); + + std::vector weightsVector(64); + ConstTensor constWeightsTensor(weightInfo, weightsVector); + + std::vector biasVector(16); + ConstTensor constBiasTensor(biasInfo, biasVector); Convolution2dDescriptor convolutionDescriptor; convolutionDescriptor.m_StrideX = 1; @@ -527,6 +745,20 @@ SubgraphView::SubgraphViewPtr BuildPartiallyOptimizableSubgraph2(Graph& graph, L // Construct the graph Layer* const input1Layer = AddInputLayer(graph, "input1 layer", inputInfo, 0); Layer* const input2Layer = AddInputLayer(graph, "input2 layer", inputInfo, 1); + + ConstantLayer* const weightsLayer1 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 1", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer1 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 1", constBiasTensor, outputInfo); + ConstantLayer* const weightsLayer2 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 2 unoptimizable", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer2 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 2 unoptimizable", constBiasTensor, outputInfo); + ConstantLayer* const weightsLayer3 = + AddConstantLayer(graph, layersInGraph, "Weights Layer 3", constWeightsTensor, outputInfo); + ConstantLayer* const biasLayer3 = + AddConstantLayer(graph, layersInGraph, "Bias Layer 3", constBiasTensor, outputInfo); + Convolution2dLayer* const conv1Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, "conv1 layer", weightInfo, biasInfo, outputInfo); Convolution2dLayer* const conv2Layer = AddConvolutionLayer(graph, layersInGraph, convolutionDescriptor, @@ -539,20 +771,35 @@ SubgraphView::SubgraphViewPtr BuildPartiallyOptimizableSubgraph2(Graph& graph, L // Connect the network input1Layer->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(0)); - input2Layer->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(0)); + weightsLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(1)); + biasLayer1->GetOutputSlot(0).Connect(conv1Layer->GetInputSlot(2)); conv1Layer->GetOutputSlot(0).Connect(addLayer->GetInputSlot(0)); + + input2Layer->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(0)); + weightsLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(1)); + biasLayer2->GetOutputSlot(0).Connect(conv2Layer->GetInputSlot(2)); conv2Layer->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(0)); + weightsLayer3->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(1)); + biasLayer3->GetOutputSlot(0).Connect(conv3Layer->GetInputSlot(2)); conv3Layer->GetOutputSlot(0).Connect(addLayer->GetInputSlot(1)); + addLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); // Create the subgraph view for the whole network + std::vector ignoreSlots = {1, 2}; return CreateSubgraphViewFrom(CreateInputsFrom({conv1Layer, - conv2Layer}), + conv2Layer}, ignoreSlots), CreateOutputsFrom({addLayer}), - {conv1Layer, - conv2Layer, - conv3Layer, - addLayer}); + { weightsLayer1, + biasLayer1, + weightsLayer2, + biasLayer2, + weightsLayer3, + biasLayer3, + conv1Layer, + conv2Layer, + conv3Layer, + addLayer }); } // The input subgraph contains only a single unsupported layer (only convolutions are unsupported by the mock backend) @@ -713,9 +960,11 @@ void FullyOptimizableSubgraphTestImpl1() CHECK(subgraphInputSlots.size() == 1); CHECK(subgraphOutputSlots.size() == 1); - CHECK(subgraphLayers.size() == 1); + CHECK(subgraphLayers.size() == 3); CHECK(Contains(layersInGraph, "conv layer")); + CHECK(Contains(layersInGraph, "Weights Layer")); + CHECK(Contains(layersInGraph, "Bias Layer")); // Create a mock backend object MockBackendInitialiser initialiser; // Register the Mock Backend @@ -776,15 +1025,25 @@ void FullyOptimizableSubgraphTestImpl2() const SubgraphView::IOutputSlots& subgraphOutputSlots = subgraphPtr->GetIOutputSlots(); const SubgraphView::IConnectableLayers& subgraphLayers = subgraphPtr->GetIConnectableLayers(); - CHECK(subgraphPtr->GetIInputSlots().size() == 1); - CHECK(subgraphPtr->GetIOutputSlots().size() == 1); - CHECK(subgraphPtr->GetIConnectableLayers().size() == 5); + CHECK(subgraphInputSlots.size() == 1); + CHECK(subgraphOutputSlots.size() == 1); + CHECK(subgraphPtr->GetIConnectableLayers().size() == 15); CHECK(Contains(layersInGraph, "conv1 layer")); CHECK(Contains(layersInGraph, "conv2 layer")); CHECK(Contains(layersInGraph, "conv3 layer")); CHECK(Contains(layersInGraph, "conv4 layer")); CHECK(Contains(layersInGraph, "conv5 layer")); + CHECK(Contains(layersInGraph, "Weights Layer 1")); + CHECK(Contains(layersInGraph, "Weights Layer 2")); + CHECK(Contains(layersInGraph, "Weights Layer 3")); + CHECK(Contains(layersInGraph, "Weights Layer 4")); + CHECK(Contains(layersInGraph, "Weights Layer 5")); + CHECK(Contains(layersInGraph, "Bias Layer 1")); + CHECK(Contains(layersInGraph, "Bias Layer 2")); + CHECK(Contains(layersInGraph, "Bias Layer 3")); + CHECK(Contains(layersInGraph, "Bias Layer 4")); + CHECK(Contains(layersInGraph, "Bias Layer 5")); // Create a mock backend object MockBackendInitialiser initialiser; // Register the Mock Backend @@ -811,20 +1070,31 @@ void FullyOptimizableSubgraphTestImpl2() const OptimizationViews::Substitutions& substitutions = optimizationViews.GetSubstitutions(); CHECK(substitutions.size() == 1); - std::list expectedSubstitutableLayers{ layersInGraph.at("conv1 layer"), + std::list expectedSubstitutableLayers{ + layersInGraph.at("Weights Layer 1"), + layersInGraph.at("Weights Layer 2"), + layersInGraph.at("Weights Layer 3"), + layersInGraph.at("Weights Layer 4"), + layersInGraph.at("Weights Layer 5"), + layersInGraph.at("Bias Layer 1"), + layersInGraph.at("Bias Layer 2"), + layersInGraph.at("Bias Layer 3"), + layersInGraph.at("Bias Layer 4"), + layersInGraph.at("Bias Layer 5"), + layersInGraph.at("conv1 layer"), layersInGraph.at("conv2 layer"), layersInGraph.at("conv3 layer"), layersInGraph.at("conv4 layer"), - layersInGraph.at("conv5 layer") }; + layersInGraph.at("conv5 layer")}; const OptimizationViews::SubstitutionPair& substitution = substitutions.at(0); - CheckSubstitution(substitution, - { subgraphInputSlots.size(), subgraphOutputSlots.size(), subgraphLayers.size() }, - { subgraphInputSlots.size(), subgraphOutputSlots.size(), 1 }, - subgraphInputSlots, - subgraphOutputSlots, - expectedSubstitutableLayers); + CheckSubstitution( + substitution, + {subgraphInputSlots.size(), subgraphOutputSlots.size(), + subgraphLayers.size()}, + {subgraphInputSlots.size(), subgraphOutputSlots.size(), 1}, + subgraphInputSlots, subgraphOutputSlots, expectedSubstitutableLayers); const SubgraphView::IConnectableLayers& substitutableSubgraphLayers = substitution.m_SubstitutableSubgraph.GetIConnectableLayers(); @@ -865,11 +1135,15 @@ void PartiallySupportedSubgraphTestImpl() CHECK(subgraphInputSlots.size() == 1); CHECK(subgraphOutputSlots.size() == 1); - CHECK(subgraphLayers.size() == 5); + CHECK(subgraphLayers.size() == 9); + CHECK(Contains(layersInGraph, "Weights Layer 1")); + CHECK(Contains(layersInGraph, "Bias Layer 1")); CHECK(Contains(layersInGraph, "conv1 layer")); CHECK(Contains(layersInGraph, "pooling1 layer")); CHECK(Contains(layersInGraph, "pooling2 layer")); + CHECK(Contains(layersInGraph, "Weights Layer 2")); + CHECK(Contains(layersInGraph, "Bias Layer 2")); CHECK(Contains(layersInGraph, "conv2 layer")); CHECK(Contains(layersInGraph, "pooling3 layer")); @@ -903,16 +1177,16 @@ void PartiallySupportedSubgraphTestImpl() s2.m_SubstitutableSubgraph.GetIConnectableLayers().front()->GetName()) < 0; }); - std::vector expectedSubstitutableSubgraphSizes{ { 1, 1, 1 }, - { 1, 1, 1 } }; + std::vector expectedSubstitutableSubgraphSizes{ { 1, 1, 3 }, + { 1, 1, 3 } }; std::vector expectedReplacementSubgraphSizes{ { 1, 1, 1 }, { 1, 1, 1 } }; std::vector expectedSubstitutableInputSlots { ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv1 layer")->GetInputSlots())), + {ConvertReferenceTypeToPointerType(layersInGraph.at("conv1 layer")->GetInputSlot(0))}), ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv2 layer")->GetInputSlots())) + {ConvertReferenceTypeToPointerType(layersInGraph.at("conv2 layer")->GetInputSlot(0))}) }; std::vector expectedSubstitutableOutputSlots @@ -924,8 +1198,8 @@ void PartiallySupportedSubgraphTestImpl() }; std::vector expectedSubstitutableLayers { - { layersInGraph.at("conv1 layer") }, - { layersInGraph.at("conv2 layer") } + { layersInGraph.at("Weights Layer 1"), layersInGraph.at("Bias Layer 1"), layersInGraph.at("conv1 layer") }, + { layersInGraph.at("Weights Layer 2"), layersInGraph.at("Bias Layer 2"), layersInGraph.at("conv2 layer") } }; for (size_t substitutionIndex = 0; substitutionIndex < substitutions.size(); substitutionIndex++) @@ -1005,7 +1279,7 @@ void FullyUnoptimizableSubgraphTestImpl1() CHECK(subgraphInputSlots.size() == 1); CHECK(subgraphOutputSlots.size() == 1); - CHECK(subgraphLayers.size() == 1); + CHECK(subgraphLayers.size() == 3); CHECK(Contains(layersInGraph, "conv layer unoptimizable")); @@ -1047,9 +1321,9 @@ void FullyUnoptimizableSubgraphTestImpl1() CHECK(untouchedSubgraphs.size() == 1); CheckUntouchedSubgraph(untouchedSubgraphs.at(0), - { subgraphInputSlots.size(), subgraphOutputSlots.size(), subgraphLayers.size() }, - subgraphInputSlots, - subgraphOutputSlots, + {subgraphInputSlots.size(), + subgraphOutputSlots.size(), subgraphLayers.size()}, + subgraphInputSlots, subgraphOutputSlots, subgraphLayers); } @@ -1069,7 +1343,7 @@ void PartiallyOptimizableSubgraphTestImpl1() CHECK(subgraphInputSlots.size() == 1); CHECK(subgraphOutputSlots.size() == 1); - CHECK(subgraphLayers.size() == 5); + CHECK(subgraphLayers.size() == 15); CHECK(Contains(layersInGraph, "conv1 layer")); CHECK(Contains(layersInGraph, "conv2 layer unoptimizable")); @@ -1107,20 +1381,20 @@ void PartiallyOptimizableSubgraphTestImpl1() { return strcmp(s1.m_SubstitutableSubgraph.GetIConnectableLayers().front()->GetName(), s2.m_SubstitutableSubgraph.GetIConnectableLayers().front()->GetName()) < 0; }); - std::vector expectedSubstitutableSubgraphSizes{ { 1, 1, 1 }, - { 1, 1, 1 }, - { 1, 1, 1 } }; + std::vector expectedSubstitutableSubgraphSizes{ { 1, 1, 3 }, + { 1, 1, 3 }, + { 1, 1, 3 } }; std::vector expectedReplacementSubgraphSizes{ { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 } }; std::vector expectedSubstitutableInputSlots { ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv1 layer")->GetInputSlots())), + {ConvertReferenceTypeToPointerType(layersInGraph.at("conv1 layer")->GetInputSlot(0))}), ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv3 layer")->GetInputSlots())), + {ConvertReferenceTypeToPointerType(layersInGraph.at("conv3 layer")->GetInputSlot(0))}), ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv5 layer")->GetInputSlots())) + {ConvertReferenceTypeToPointerType(layersInGraph.at("conv5 layer")->GetInputSlot(0))}) }; std::vector expectedSubstitutableOutputSlots { @@ -1133,9 +1407,9 @@ void PartiallyOptimizableSubgraphTestImpl1() }; std::vector expectedSubstitutableLayers { - { layersInGraph.at("conv1 layer") }, - { layersInGraph.at("conv3 layer") }, - { layersInGraph.at("conv5 layer") } + { layersInGraph.at("Weights Layer 1"), layersInGraph.at("Bias Layer 1"), layersInGraph.at("conv1 layer") }, + { layersInGraph.at("Weights Layer 3"), layersInGraph.at("Bias Layer 3"), layersInGraph.at("conv3 layer") }, + { layersInGraph.at("Weights Layer 5"), layersInGraph.at("Bias Layer 5"), layersInGraph.at("conv5 layer") } }; for (size_t substitutionIndex = 0; substitutionIndex < substitutions.size(); substitutionIndex++) @@ -1166,27 +1440,33 @@ void PartiallyOptimizableSubgraphTestImpl1() s2.GetIConnectableLayers().front()->GetName()) < 0; }); - std::vector expectedUntouchedSubgraphSizes{ { 1, 1, 1 }, - { 1, 1, 1 } }; - std::vector expectedUntouchedInputSlots - { - ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv2 layer unoptimizable")->GetInputSlots())), - ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv4 layer unoptimizable")->GetInputSlots())) - }; + std::vector expectedUntouchedSubgraphSizes{ { 1, 1, 3 }, + { 1, 1, 3 } }; + std::vector expectedUntouchedInputSlots{ + ConvertSlotsToISlots({ConvertReferenceTypeToPointerType( + layersInGraph.at("conv2 layer unoptimizable")->GetInputSlot(0))}), + ConvertSlotsToISlots({ConvertReferenceTypeToPointerType( + layersInGraph.at("conv4 layer unoptimizable")->GetInputSlot(0))})}; + std::vector expectedUntouchedOutputSlots - { + { ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv2 layer unoptimizable")->GetOutputSlots())), + ConvertReferenceTypeToPointerType(layersInGraph.at("conv2 layer unoptimizable")->GetOutputSlots())), ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv4 layer unoptimizable")->GetOutputSlots())) - }; + ConvertReferenceTypeToPointerType(layersInGraph.at("conv4 layer unoptimizable")->GetOutputSlots())) + }; + std::vector expectedUntouchedLayers - { - { layersInGraph.at("conv2 layer unoptimizable") }, - { layersInGraph.at("conv4 layer unoptimizable") } - }; + { + { layersInGraph.at("Weights Layer 2 unoptimizable"), + layersInGraph.at("Bias Layer 2 unoptimizable"), + layersInGraph.at("conv2 layer unoptimizable") }, + { layersInGraph.at("Weights Layer 4 unoptimizable"), + layersInGraph.at("Bias Layer 4 unoptimizable"), + layersInGraph.at("conv4 layer unoptimizable") } + }; for (size_t untouchedIndex = 0; untouchedIndex < untouchedSubgraphs.size(); untouchedIndex++) { @@ -1215,7 +1495,7 @@ void PartiallyOptimizableSubgraphTestImpl2() CHECK(subgraphInputSlots.size() == 2); CHECK(subgraphOutputSlots.size() == 1); - CHECK(subgraphLayers.size() == 4); + CHECK(subgraphLayers.size() == 10); CHECK(Contains(layersInGraph, "conv1 layer")); CHECK(Contains(layersInGraph, "conv2 layer unoptimizable")); @@ -1247,7 +1527,7 @@ void PartiallyOptimizableSubgraphTestImpl2() const OptimizationViews::Substitutions& substitutions = optimizationViews.GetSubstitutions(); CHECK(substitutions.size() == 1); - ExpectedSubgraphSize expectedSubstitutableSubgraphSizes{ 2, 1, 3 }; + ExpectedSubgraphSize expectedSubstitutableSubgraphSizes{ 2, 1, 7 }; ExpectedSubgraphSize expectedReplacementSubgraphSizes{ 2, 1, 1 }; SubgraphView::IInputSlots expectedSubstitutableInputSlots @@ -1266,6 +1546,10 @@ void PartiallyOptimizableSubgraphTestImpl2() SubgraphView::IConnectableLayers expectedSubstitutableLayers { + layersInGraph.at("Weights Layer 1"), + layersInGraph.at("Weights Layer 3"), + layersInGraph.at("Bias Layer 1"), + layersInGraph.at("Bias Layer 3"), layersInGraph.at("conv1 layer"), layersInGraph.at("conv3 layer"), layersInGraph.at("add layer") @@ -1291,12 +1575,12 @@ void PartiallyOptimizableSubgraphTestImpl2() const OptimizationViews::Subgraphs& untouchedSubgraphs = optimizationViews.GetUntouchedSubgraphs(); CHECK(untouchedSubgraphs.size() == 1); - std::vector expectedUntouchedSubgraphSizes{ { 1, 1, 1 } }; + std::vector expectedUntouchedSubgraphSizes{ { 1, 1, 3 } }; std::vector expectedUntouchedInputSlots { - ConvertSlotsToISlots( - ConvertReferenceTypeToPointerType(layersInGraph.at("conv2 layer unoptimizable")->GetInputSlots())) - }; + ConvertSlotsToISlots({ConvertReferenceTypeToPointerType( + layersInGraph.at("conv2 layer unoptimizable")->GetInputSlot(0))})}; std::vector expectedUntouchedOutputSlots { ConvertSlotsToISlots( @@ -1304,7 +1588,8 @@ void PartiallyOptimizableSubgraphTestImpl2() }; std::vector expectedUntouchedLayers { - { layersInGraph.at("conv2 layer unoptimizable") } + { layersInGraph.at("conv2 layer unoptimizable"), layersInGraph.at("Weights Layer 2 unoptimizable"), + layersInGraph.at("Bias Layer 2 unoptimizable") } }; for (size_t untouchedIndex = 0; untouchedIndex < untouchedSubgraphs.size(); untouchedIndex++) diff --git a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp index cc7974130d..8e3b275649 100644 --- a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp +++ b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp @@ -401,11 +401,14 @@ TEST_CASE("OptimizeNetworkCopy") armnn::INetworkPtr network = armnn::INetwork::Create(); armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); + + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(descriptor, weights, armnn::Optional(biases), layerName.c_str()); + ARMNN_NO_DEPRECATE_WARN_END armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); diff --git a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp index c715d28ebe..fed21eb911 100644 --- a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp +++ b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp @@ -642,7 +642,7 @@ TEST_CASE("LstmQueueDescriptor_Validate") CHECK_NOTHROW(data.Validate(info)); } -TEST_CASE("BiasPerAxisQuantization_Validate") +TEST_CASE("BiasPerAxisQuantization_ValidateCorrectValues") { constexpr unsigned int nInput = 1u; constexpr unsigned int cInput = 3u; @@ -675,6 +675,7 @@ TEST_CASE("BiasPerAxisQuantization_Validate") WorkloadInfo workloadInfo; AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, nullptr); + AddInputToWorkload(queueDescriptor, workloadInfo, weightInfo, nullptr); AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, nullptr); ScopedTensorHandle weightTensor(weightInfo); @@ -687,17 +688,102 @@ TEST_CASE("BiasPerAxisQuantization_Validate") ScopedTensorHandle biasHandle1(biasInfo1); queueDescriptor.m_Bias = &biasHandle1; + AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo1, nullptr); + CHECK_NOTHROW(queueDescriptor.Validate(workloadInfo)); +} - // Test 2: wrong per-axis quantization values +TEST_CASE("BiasPerAxisQuantization_ValidateIncorrectValues") +{ + constexpr unsigned int nInput = 1u; + constexpr unsigned int cInput = 3u; + constexpr unsigned int hInput = 3u; + constexpr unsigned int wInput = 3u; + + constexpr unsigned int nOutput = nInput; + constexpr unsigned int cOutput = cInput; + constexpr unsigned int hOutput = 1u; + constexpr unsigned int wOutput = 1u; + + const TensorShape inputShape { nInput, cInput, hInput, wInput }; + const TensorShape outputShape{ nOutput, cOutput, hOutput, wOutput }; + const TensorShape weightShape{ cOutput, cInput, hInput, wInput }; + const TensorShape biasShape { cOutput }; + + constexpr DataType inputType = DataType::QAsymmU8; + constexpr DataType weightType = DataType::QSymmS8; + constexpr DataType biasType = DataType::Signed32; + + constexpr float perTensorScale = 1.5f; + const TensorInfo inputInfo (inputShape, inputType, perTensorScale); + const TensorInfo outputInfo(outputShape, inputType, perTensorScale); + + const std::vector weightPerAxisScales = { 2.50f, 3.50f }; + const TensorInfo weightInfo(weightShape, weightType, weightPerAxisScales, 0); + + Convolution2dQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters.m_BiasEnabled = true; + + WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, nullptr); + AddInputToWorkload(queueDescriptor, workloadInfo, weightInfo, nullptr); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, nullptr); + + ScopedTensorHandle weightTensor(weightInfo); + queueDescriptor.m_Weight = &weightTensor; + + // Test 2: wrong per-axis quantization values const std::vector biasPerAxisScales2 = { 4.00f, 5.00f }; const TensorInfo biasInfo2(biasShape, biasType, biasPerAxisScales2, 0); ScopedTensorHandle biasHandle2(biasInfo2); queueDescriptor.m_Bias = &biasHandle2; + AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo2, nullptr); + CHECK_NOTHROW(queueDescriptor.Validate(workloadInfo)); +} + +TEST_CASE("BiasPerAxisQuantization_ValidateInvalidArgumentException") +{ + constexpr unsigned int nInput = 1u; + constexpr unsigned int cInput = 3u; + constexpr unsigned int hInput = 3u; + constexpr unsigned int wInput = 3u; + + constexpr unsigned int nOutput = nInput; + constexpr unsigned int cOutput = cInput; + constexpr unsigned int hOutput = 1u; + constexpr unsigned int wOutput = 1u; + + const TensorShape inputShape { nInput, cInput, hInput, wInput }; + const TensorShape outputShape{ nOutput, cOutput, hOutput, wOutput }; + const TensorShape weightShape{ cOutput, cInput, hInput, wInput }; + const TensorShape biasShape { cOutput }; + + constexpr DataType inputType = DataType::QAsymmU8; + constexpr DataType weightType = DataType::QSymmS8; + constexpr DataType biasType = DataType::Signed32; + + constexpr float perTensorScale = 1.5f; + const TensorInfo inputInfo (inputShape, inputType, perTensorScale); + const TensorInfo outputInfo(outputShape, inputType, perTensorScale); + + const std::vector weightPerAxisScales = { 2.50f, 3.50f }; + const TensorInfo weightInfo(weightShape, weightType, weightPerAxisScales, 0); + + Convolution2dQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters.m_BiasEnabled = true; + + WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, nullptr); + AddInputToWorkload(queueDescriptor, workloadInfo, weightInfo, nullptr); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, nullptr); + + ScopedTensorHandle weightTensor(weightInfo); + queueDescriptor.m_Weight = &weightTensor; + // Test 3: mismatched number of quantization scales const std::vector biasPerAxisScales3 = { 3.75f, 5.25f, 5.25f }; const TensorInfo biasInfo3(biasShape, biasType, biasPerAxisScales3, 0); @@ -705,7 +791,10 @@ TEST_CASE("BiasPerAxisQuantization_Validate") ScopedTensorHandle biasHandle3(biasInfo3); queueDescriptor.m_Bias = &biasHandle3; + AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo3, nullptr); + CHECK_THROWS_AS(queueDescriptor.Validate(workloadInfo), InvalidArgumentException); } + } diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp index 74c65e271c..1e0adc169a 100644 --- a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp @@ -309,6 +309,7 @@ LayerTestResult SimpleConvolution2dTestImpl( std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc); armnn::Convolution2dQueueDescriptor data; armnn::WorkloadInfo info; @@ -329,8 +330,15 @@ LayerTestResult SimpleConvolution2dTestImpl( } AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, kernelDesc, weightsHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + std::unique_ptr biasHandle = nullptr; + if (biasEnabled) + { + biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc); + AddInputToWorkload(data, info, biasDesc, biasHandle.get()); + } data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. data.m_Parameters.m_StrideX = strideX; @@ -349,8 +357,16 @@ LayerTestResult SimpleConvolution2dTestImpl( info); inputHandle->Allocate(); outputHandle->Allocate(); + weightsHandle->Allocate(); + + if (biasEnabled) + { + biasHandle->Allocate(); + CopyDataToITensorHandle(biasHandle.get(), bias.data()); + } CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + CopyDataToITensorHandle(weightsHandle.get(), kernel.data()); ExecuteWorkload(*workload, memoryManager); @@ -423,6 +439,8 @@ LayerTestResult SimpleConvolution2dNhwcTestImpl( std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc); + std::unique_ptr biasHandle = nullptr; armnn::ScopedTensorHandle weightsTensor(kernelDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); @@ -444,15 +462,30 @@ LayerTestResult SimpleConvolution2dNhwcTestImpl( armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, kernelDesc, weightsHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + if (biasEnabled) + { + biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc); + AddInputToWorkload(data, info, biasDesc, biasHandle.get()); + } + std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); + weightsHandle->Allocate(); + + if (biasEnabled) + { + biasHandle->Allocate(); + CopyDataToITensorHandle(biasHandle.get(), bias.data()); + } CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + CopyDataToITensorHandle(weightsHandle.get(), kernel.data()); ExecuteWorkload(*workload, memoryManager); @@ -552,35 +585,52 @@ LayerTestResult Convolution1dTestImpl( std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); + std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo); + std::unique_ptr biasHandle = nullptr; armnn::Convolution2dQueueDescriptor data; armnn::WorkloadInfo info; - armnn::ScopedTensorHandle weightsTensor(kernelInfo); - armnn::ScopedTensorHandle biasTensor(biasInfo); + armnn::ScopedTensorHandle weightsTensor(kernelInfo); + armnn::ScopedTensorHandle biasTensor(biasInfo); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); AddInputToWorkload(data, info, inputInfo, inputHandle.get()); + AddInputToWorkload(data, info, kernelInfo, weightsHandle.get()); AddOutputToWorkload(data, info, outputInfo, outputHandle.get()); - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; - data.m_Parameters.m_StrideX = 1; - data.m_Parameters.m_StrideY = stride; - data.m_Parameters.m_PadLeft = 0; - data.m_Parameters.m_PadRight = 0; - data.m_Parameters.m_PadTop = padSize; - data.m_Parameters.m_PadBottom = padSize; - data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = stride; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = padSize; + data.m_Parameters.m_PadBottom = padSize; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + if (biasEnabled) + { + biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo); + AddInputToWorkload(data, info, biasInfo, biasHandle.get()); + } std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); + weightsHandle->Allocate(); + + if (biasEnabled) + { + biasHandle->Allocate(); + CopyDataToITensorHandle(biasHandle.get(), biasData.data()); + } CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + CopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); ExecuteWorkload(*workload, memoryManager); @@ -1364,18 +1414,30 @@ LayerTestResult CompareConvolution2dTestImpl( std::vector expectedOutput(outputTensorInfo.GetNumElements()); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc); + std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::Convolution2dQueueDescriptor data; armnn::WorkloadInfo info; + armnn::ScopedTensorHandle weightsTensor(kernelDesc); armnn::ScopedTensorHandle biasTensor(biasDesc); + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, kernelDesc, weightsHandle.get()); + AddInputToWorkload(data, info, biasDesc, biasHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + // AllocateAndCopyDataToITensorHandle() is required twice for the weights AND biases: + // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons. + // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight). + // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method. + AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernel.data()); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); + AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; data.m_Parameters.m_StrideX = strideX; @@ -1387,11 +1449,15 @@ LayerTestResult CompareConvolution2dTestImpl( data.m_Parameters.m_BiasEnabled = true; std::unique_ptr outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr weightsHandleRef = refTensorHandleFactory.CreateTensorHandle(kernelDesc); + std::unique_ptr biasHandleRef = refTensorHandleFactory.CreateTensorHandle(biasDesc); std::unique_ptr inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo); armnn::Convolution2dQueueDescriptor refData = data; armnn::WorkloadInfo refInfo = info; SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadInput(refData, refInfo, 1, kernelDesc, weightsHandleRef.get()); + SetWorkloadInput(refData, refInfo, 2, biasDesc, biasHandleRef.get()); SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); std::unique_ptr workload @@ -1401,12 +1467,16 @@ LayerTestResult CompareConvolution2dTestImpl( outputHandleRef->Allocate(); inputHandleRef->Allocate(); + weightsHandleRef->Allocate(); + biasHandleRef->Allocate(); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), input.data()); CopyDataToITensorHandle(inputHandleRef.get(), input.data()); + CopyDataToITensorHandle(weightsHandleRef.get(), kernel.data()); + CopyDataToITensorHandle(biasHandleRef.get(), bias.data()); ExecuteWorkload(*workload, memoryManager); @@ -3622,6 +3692,8 @@ LayerTestResult Convolution2dPerAxisQuantTest( std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); + std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo); + std::unique_ptr biasHandle = nullptr; WorkloadInfo workloadInfo; ScopedTensorHandle weightTensor(kernelInfo); @@ -3636,6 +3708,14 @@ LayerTestResult Convolution2dPerAxisQuantTest( queueDescriptor.m_Bias = &biasTensor; AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get()); + AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get()); + + if (descriptor.m_BiasEnabled) + { + biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo); + AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get()); + } + AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); std::unique_ptr workload= workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, @@ -3643,8 +3723,16 @@ LayerTestResult Convolution2dPerAxisQuantTest( workloadInfo); inputHandle->Allocate(); outputHandle->Allocate(); + weightsHandle->Allocate(); + if (descriptor.m_BiasEnabled) + { + biasHandle->Allocate(); + CopyDataToITensorHandle(biasHandle.get(), biasData.data()); + } CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + CopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); + ExecuteWorkload(*workload, memoryManager); diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 47990d87dc..bd1b94e79f 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -341,14 +341,14 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, if (baseLayer->GetParameters().m_BiasEnabled) { - biases = baseLayer->m_Bias->GetTensorInfo(); + biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo(); } arm_compute::Status status = ClConvolution2dWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetParameters(), - baseLayer->m_Weight->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), biases, isFastMathEnabled, &activationDesc); diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 4f53b921d0..3a757f8820 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -471,6 +471,8 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dClCompiledContext auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); + std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo); + std::unique_ptr biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); @@ -487,6 +489,8 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dClCompiledContext queueDescriptor.m_Bias = &biasTensor; AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get()); + AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get()); + AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get()); AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); // Initialize our m_CLCompileContext using default device and context diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp index e10e81ac26..9bfd1fb46d 100644 --- a/src/backends/cl/test/ClImportTensorHandleTests.cpp +++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp @@ -11,7 +11,6 @@ #include - #include #include #include "Network.hpp" @@ -320,10 +319,13 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConv2dEndToEnd") convDesc2d.m_PadTop = 1; convDesc2d.m_PadBottom = 1; convDesc2d.m_DataLayout = DataLayout::NHWC; + + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d, weights, armnn::EmptyOptional(), "conv"); + ARMNN_NO_DEPRECATE_WARN_END ARMNN_ASSERT(convLayer); inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); @@ -876,10 +878,12 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportRepeatedInferencesEndTo convDesc2d.m_PadTop = 1; convDesc2d.m_PadBottom = 1; convDesc2d.m_DataLayout = DataLayout::NHWC; + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d, weights, armnn::EmptyOptional(), "conv"); + ARMNN_NO_DEPRECATE_WARN_END ARMNN_ASSERT(convLayer); inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); @@ -1094,10 +1098,12 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportRepeatedInferencesInver convDesc2d.m_PadTop = 1; convDesc2d.m_PadBottom = 1; convDesc2d.m_DataLayout = DataLayout::NHWC; + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d, weights, armnn::EmptyOptional(), "conv"); + ARMNN_NO_DEPRECATE_WARN_END ARMNN_ASSERT(convLayer); inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index bf82fbf255..e3d679a773 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -28,6 +28,15 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, bool isFastMathEnabled, const ActivationDescriptor* activationDescriptor) { + // The implemented workload does support both const and non const + // weights. However, in the case of non const weights we'd have to call + // prepare or configure for each inference which we're not setup to do just yet. + if (!weights.IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN ClConvolution2dWorkload does not support non constant weights."}; + } + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); @@ -41,7 +50,12 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, if (descriptor.m_BiasEnabled) { ARMNN_ASSERT(biases.has_value()); - + // Same for bias as weights. We don't currently support non const. + if (!biases.value().IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN ClConvolution2dWorkload does not support non constant bias."}; + } aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); optionalAclBiasesInfo = &aclBiasesInfo; } @@ -72,6 +86,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload"); const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + m_Data.ValidateInputsOutputs("ClConvolution2dWorkload", 1, 1); m_KernelTensor = std::make_unique(); BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); @@ -85,8 +100,6 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); } - m_Data.ValidateInputsOutputs("ClConvolution2dWorkload", 1, 1); - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 39ad4b9f32..24336426ea 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -193,14 +193,14 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph if (baseLayer->GetParameters().m_BiasEnabled) { - biases = baseLayer->m_Bias->GetTensorInfo(); + biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo(); } arm_compute::Status status = NeonConvolution2dWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetParameters(), - baseLayer->m_Weight->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), biases, false, &activationDesc); diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp index fce57e62a8..d5716c8014 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp @@ -29,6 +29,15 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, bool isFastMathEnabled, const ActivationDescriptor* activationDescriptor) { + // The implemented workload does support both const and non const + // weights. However, in the case of non const weights we'd have to call + // prepare or configure for each inference which we're not setup to do just yet. + if (!weights.IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN NeonConvolution2dWorkload does not support non constant weights."}; + } + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); @@ -42,7 +51,12 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, if (descriptor.m_BiasEnabled) { ARMNN_ASSERT(biases.has_value()); - + // Same for bias as weights. We don't currently support non const. + if (!biases.value().IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN NeonConvolution2dWorkload does not support non constant bias."}; + } aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); optionalAclBiasesInfo = &aclBiasesInfo; } diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index f921383183..8051dcffa0 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -1246,11 +1246,12 @@ bool RefLayerSupport::IsDequantizeSupported(const TensorInfo& input, { bool supported = true; - std::array supportedInputTypes = { + std::array supportedInputTypes = { DataType::QAsymmS8, DataType::QAsymmU8, DataType::QSymmS8, - DataType::QSymmS16 + DataType::QSymmS16, + DataType::Float16 }; supported &= CheckSupportRule(TypeAnyOf(input, supportedInputTypes), reasonIfUnsupported, diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp index d57040eaec..fe97cb1066 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp @@ -12,37 +12,46 @@ namespace armnn { -RefConvolution2dWorkload::RefConvolution2dWorkload( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) +RefConvolution2dWorkload::RefConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) : RefBaseWorkload(descriptor, info) { WorkloadInfo detailsInfo; detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos; - detailsInfo.m_WeightsTensorInfo = armnn::Optional(descriptor.m_Weight->GetTensorInfo()); - if (descriptor.m_Parameters.m_BiasEnabled) - { - detailsInfo.m_BiasTensorInfo = armnn::Optional(descriptor.m_Bias->GetTensorInfo()); - } // Report Profiling Details ARMNN_REPORT_PROFILING_WORKLOAD_DESC("RefConvolution2dWorkload_Construct", descriptor.m_Parameters, detailsInfo, this->GetGuid()); +} - m_Weight = std::make_unique(*( descriptor.m_Weight )); - const TensorInfo& rFilterInfo = m_Weight->GetTensorInfo(); +void RefConvolution2dWorkload::PostAllocationConfigure() +{ + PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs); +} +void RefConvolution2dWorkload::PostAllocationConfigure(std::vector inputs, + std::vector outputs) +{ + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); + m_InputShape = inputInfo.GetShape(); + + const TensorInfo& rFilterInfo = GetTensorInfo(inputs[1]); + ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); m_FilterShape = rFilterInfo.GetShape(); - m_FilterDecoder = MakeDecoder(rFilterInfo, m_Weight.get()->Map(true)); + m_FilterDecoder = MakeDecoder(rFilterInfo); - if ( descriptor.m_Parameters.m_BiasEnabled ) + if (m_Data.m_Parameters.m_BiasEnabled) { - m_Bias = std::make_unique(*( descriptor.m_Bias )); - const TensorInfo& biasInfo = m_Bias->GetTensorInfo(); - m_BiasDecoder = MakeDecoder(biasInfo, m_Bias->Map(true)); + const TensorInfo& biasInfo = GetTensorInfo(inputs[2]); + m_BiasDecoder = MakeDecoder(biasInfo); } + + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + m_OutputShape = outputInfo.GetShape(); } void RefConvolution2dWorkload::Execute() const @@ -52,6 +61,8 @@ void RefConvolution2dWorkload::Execute() const void RefConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) { + PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } @@ -62,14 +73,17 @@ void RefConvolution2dWorkload::Execute(std::vector inputs, std:: std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), inputs[0]->Map()); std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), outputs[0]->Map()); - const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape(); - const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape(); + m_FilterDecoder->Reset(inputs[1]->Map()); + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasDecoder->Reset(inputs[2]->Map()); + } - Convolve(inputShape, *inputDecoder, outputShape, *outputEncoder, m_FilterShape, + Convolve(m_InputShape, *inputDecoder, m_OutputShape, *outputEncoder, m_FilterShape, *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(), m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft, m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY, m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY); } -} //namespace armnn +} //namespace armnn \ No newline at end of file diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp index 3335782f78..1cb30b6890 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp @@ -19,20 +19,21 @@ public: explicit RefConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void PostAllocationConfigure() override; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void PostAllocationConfigure(std::vector inputs, std::vector outputs); void Execute(std::vector inputs, std::vector outputs) const; - std::unique_ptr m_Weight; - std::unique_ptr m_Bias; std::unique_ptr> m_FilterDecoder; std::unique_ptr> m_BiasDecoder; + TensorShape m_InputShape; + TensorShape m_OutputShape; TensorShape m_FilterShape; }; -} //namespace armnn - +} //namespace armnn \ No newline at end of file diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp index ef0b16d1cd..30ee6d8ace 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp @@ -22,13 +22,6 @@ public: private: void Execute(std::vector inputs, std::vector outputs) const; - std::unique_ptr m_Weight; - std::unique_ptr m_Bias; - - std::unique_ptr > m_FilterDecoder; - std::unique_ptr > m_BiasDecoder; - - TensorShape m_FilterShape; }; } //namespace armnn diff --git a/src/profiling/test/ProfilingTestUtils.cpp b/src/profiling/test/ProfilingTestUtils.cpp index 0159f61140..58708cab9a 100644 --- a/src/profiling/test/ProfilingTestUtils.cpp +++ b/src/profiling/test/ProfilingTestUtils.cpp @@ -424,8 +424,9 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) conv2dDesc.m_PadTop = 2; conv2dDesc.m_PadBottom = 2; conv2dDesc.m_BiasEnabled = true; + ARMNN_NO_DEPRECATE_WARN_BEGIN IConnectableLayer* conv2d = net->AddConvolution2dLayer(conv2dDesc, weights, optionalBiases); - + ARMNN_NO_DEPRECATE_WARN_END // Abs layer armnn::ElementwiseUnaryDescriptor absDesc; armnn::IConnectableLayer* const abs = net->AddElementwiseUnaryLayer(absDesc, "abs"); @@ -515,7 +516,6 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) // Input layer // Input layer entity VerifyTimelineEntityBinaryPacketData(input->GetGuid(), readableData, offset); - // Name Entity ProfilingGuid inputLabelGuid = VerifyTimelineLabelBinaryPacketData( arm::pipe::EmptyOptional(), "input", readableData, offset); @@ -547,6 +547,159 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) readableData, offset); + // Weights layer + // We will not check the GUID from the packets since we haven't direct access to the layer + // The GUID will change depending on the number of tests ran since we do are not explicitly resetting the + // ProfilingGuid counter at the beginning of this test + + + // Weights layer entity + VerifyTimelineEntityBinaryPacketData( arm::pipe::EmptyOptional(), readableData, offset); + + // Name entity + ProfilingGuid weightsNameLabelGuid = VerifyTimelineLabelBinaryPacketData( + arm::pipe::EmptyOptional(), "Weights", readableData, offset); + + // Entity - Name relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + weightsNameLabelGuid, + LabelsAndEventClasses::NAME_GUID, + readableData, + offset); + + // Entity - Type relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + LabelsAndEventClasses::LAYER_GUID, + LabelsAndEventClasses::TYPE_GUID, + readableData, + offset); + + // Network - Weights layer relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + optNetGuid, + arm::pipe::EmptyOptional(), + LabelsAndEventClasses::CHILD_GUID, + readableData, + offset); + + // Weights workload + // Weights workload entity + ProfilingGuid weightsWorkloadGuid = VerifyTimelineEntityBinaryPacketData( + arm::pipe::EmptyOptional(), readableData, offset); + + // Entity - Type relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + weightsWorkloadGuid, + LabelsAndEventClasses::WORKLOAD_GUID, + LabelsAndEventClasses::TYPE_GUID, + readableData, + offset); + + // BackendId entity + ProfilingGuid backendIdLabelGuid = VerifyTimelineLabelBinaryPacketData( + arm::pipe::EmptyOptional(), backendId.Get(), readableData, offset); + + // Entity - BackendId relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + weightsWorkloadGuid, + backendIdLabelGuid, + LabelsAndEventClasses::BACKENDID_GUID, + readableData, + offset); + + + // Weights layer - Weights workload relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + weightsWorkloadGuid, + LabelsAndEventClasses::CHILD_GUID, + readableData, + offset); + + // Bias layer + // We will not check the GUID from the packets since we haven't direct access to the layer + // The GUID will change depending on the number of tests ran since we do are not explicitly resetting the + // ProfilingGuid counter at the beginning of this test + + // Bias layer entity + VerifyTimelineEntityBinaryPacketData(arm::pipe::EmptyOptional(), readableData, offset); + + // Name entity + ProfilingGuid biasNameLabelGuid = VerifyTimelineLabelBinaryPacketData( + arm::pipe::EmptyOptional(), "Bias", readableData, offset); + + // Entity - Name relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + biasNameLabelGuid, + LabelsAndEventClasses::NAME_GUID, + readableData, + offset); + + // Entity - Type relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + LabelsAndEventClasses::LAYER_GUID, + LabelsAndEventClasses::TYPE_GUID, + readableData, + offset); + + // Network - Bias layer relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + optNetGuid, + arm::pipe::EmptyOptional(), + LabelsAndEventClasses::CHILD_GUID, + readableData, + offset); + + // Bias workload + // Bias workload entity + ProfilingGuid biasWorkloadGuid = VerifyTimelineEntityBinaryPacketData( + arm::pipe::EmptyOptional(), readableData, offset); + + // Entity - Type relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + biasWorkloadGuid, + LabelsAndEventClasses::WORKLOAD_GUID, + LabelsAndEventClasses::TYPE_GUID, + readableData, + offset); + + // BackendId entity + backendIdLabelGuid = VerifyTimelineLabelBinaryPacketData( + arm::pipe::EmptyOptional(), backendId.Get(), readableData, offset); + + // Entity - BackendId relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + biasWorkloadGuid, + backendIdLabelGuid, + LabelsAndEventClasses::BACKENDID_GUID, + readableData, + offset); + + + // Bias layer - Bias workload relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + biasWorkloadGuid, + LabelsAndEventClasses::CHILD_GUID, + readableData, + offset); + // Conv2d layer // Conv2d layer entity VerifyTimelineEntityBinaryPacketData(conv2d->GetGuid(), readableData, offset); @@ -591,6 +744,24 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) readableData, offset); + // Weights layer - Conv2d layer relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + conv2d->GetGuid(), + LabelsAndEventClasses::CONNECTION_GUID, + readableData, + offset); + + // Bias layer - Conv2d layer relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + arm::pipe::EmptyOptional(), + conv2d->GetGuid(), + LabelsAndEventClasses::CONNECTION_GUID, + readableData, + offset); + // Conv2d workload // Conv2d workload entity ProfilingGuid conv2DWorkloadGuid = VerifyTimelineEntityBinaryPacketData( @@ -606,7 +777,7 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) offset); // BackendId entity - ProfilingGuid backendIdLabelGuid = VerifyTimelineLabelBinaryPacketData( + backendIdLabelGuid = VerifyTimelineLabelBinaryPacketData( arm::pipe::EmptyOptional(), backendId.Get(), readableData, offset); // Entity - BackendId relationship @@ -884,7 +1055,7 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) // Validate inference data size = inferenceReadableBuffer->GetSize(); - CHECK(size == 1228 + 10 * ThreadIdSize); + CHECK(size == 1748 + 10 * ThreadIdSize); readableData = inferenceReadableBuffer->GetReadableData(); CHECK(readableData != nullptr); @@ -892,7 +1063,7 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) offset = 0; // Verify Header - VerifyTimelineHeaderBinary(readableData, offset, 1220 + 10 * ThreadIdSize); + VerifyTimelineHeaderBinary(readableData, offset, 1740 + 10 * ThreadIdSize); // Inference timeline trace // Inference entity @@ -992,6 +1163,126 @@ void VerifyPostOptimisationStructureTestImpl(armnn::BackendId backendId) readableData, offset); + // Weights workload execution + // Weights workload execution entity + ProfilingGuid weightsWorkloadExecutionGuid = VerifyTimelineEntityBinaryPacketData( + arm::pipe::EmptyOptional(), readableData, offset); + + // Entity - Type relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + weightsWorkloadExecutionGuid, + LabelsAndEventClasses::WORKLOAD_EXECUTION_GUID, + LabelsAndEventClasses::TYPE_GUID, + readableData, + offset); + + // Inference - Workload execution relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + inferenceGuid, + weightsWorkloadExecutionGuid, + LabelsAndEventClasses::CHILD_GUID, + readableData, + offset); + + // Workload - Workload execution relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + weightsWorkloadGuid, + weightsWorkloadExecutionGuid, + LabelsAndEventClasses::EXECUTION_OF_GUID, + readableData, + offset); + + // Start Weights workload execution life + // Event packet - timeline, threadId, eventGuid + ProfilingGuid weightsWorkloadExecutionSOLEventGuid = VerifyTimelineEventBinaryPacket( + arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), readableData, offset); + + // Weights workload execution - event relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::ExecutionLink, + arm::pipe::EmptyOptional(), + weightsWorkloadExecutionGuid, + weightsWorkloadExecutionSOLEventGuid, + LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS, + readableData, + offset); + + // End of Weights workload execution life + // Event packet - timeline, threadId, eventGuid + ProfilingGuid weightsWorkloadExecutionEOLEventGuid = VerifyTimelineEventBinaryPacket( + arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), readableData, offset); + + // Weights workload execution - event relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::ExecutionLink, + arm::pipe::EmptyOptional(), + weightsWorkloadExecutionGuid, + weightsWorkloadExecutionEOLEventGuid, + LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS, + readableData, + offset); + + // Bias workload execution + // Bias workload execution entity + ProfilingGuid biasWorkloadExecutionGuid = VerifyTimelineEntityBinaryPacketData( + arm::pipe::EmptyOptional(), readableData, offset); + + // Entity - Type relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::LabelLink, + arm::pipe::EmptyOptional(), + biasWorkloadExecutionGuid, + LabelsAndEventClasses::WORKLOAD_EXECUTION_GUID, + LabelsAndEventClasses::TYPE_GUID, + readableData, + offset); + + // Inference - Workload execution relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + inferenceGuid, + biasWorkloadExecutionGuid, + LabelsAndEventClasses::CHILD_GUID, + readableData, + offset); + + // Workload - Workload execution relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::RetentionLink, + arm::pipe::EmptyOptional(), + biasWorkloadGuid, + biasWorkloadExecutionGuid, + LabelsAndEventClasses::EXECUTION_OF_GUID, + readableData, + offset); + + // Start Bias workload execution life + // Event packet - timeline, threadId, eventGuid + ProfilingGuid biasWorkloadExecutionSOLEventGuid = VerifyTimelineEventBinaryPacket( + arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), readableData, offset); + + // Bias workload execution - event relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::ExecutionLink, + arm::pipe::EmptyOptional(), + biasWorkloadExecutionGuid, + biasWorkloadExecutionSOLEventGuid, + LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS, + readableData, + offset); + + // End of Bias workload execution life + // Event packet - timeline, threadId, eventGuid + ProfilingGuid biasWorkloadExecutionEOLEventGuid = VerifyTimelineEventBinaryPacket( + arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), arm::pipe::EmptyOptional(), readableData, offset); + + // Bias workload execution - event relationship + VerifyTimelineRelationshipBinaryPacketData(ProfilingRelationshipType::ExecutionLink, + arm::pipe::EmptyOptional(), + biasWorkloadExecutionGuid, + biasWorkloadExecutionEOLEventGuid, + LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS, + readableData, + offset); + // Conv2d workload execution // Conv2d workload execution entity ProfilingGuid conv2DWorkloadExecutionGuid = VerifyTimelineEntityBinaryPacketData( -- cgit v1.2.1