From 76615a5edd55b890acdd5fb078d9242e1e719a45 Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Tue, 4 Aug 2020 14:01:05 +0100 Subject: IVGCVSW-5108 Allow Concat to use subtensor on x and y * Updated ConcatLayer to allow using subtensors on x/y if padding is not required Signed-off-by: Sadik Armagan Change-Id: I46a8fb9f17b976b76e069bb82614b6628a206717 --- src/armnn/layers/ConcatLayer.cpp | 45 +++++++++++-- src/armnn/layers/ConcatLayer.hpp | 2 +- src/backends/neon/NeonTensorHandleFactory.cpp | 11 ---- src/backends/neon/test/NeonTensorHandleTests.cpp | 83 ++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 16 deletions(-) diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp index d9fffff57e..fac6a1f197 100644 --- a/src/armnn/layers/ConcatLayer.cpp +++ b/src/armnn/layers/ConcatLayer.cpp @@ -36,7 +36,7 @@ std::unique_ptr ConcatLayer::CreateWorkload(const IWorkloadFactory& f } template -void ConcatLayer::CreateTensors(const FactoryType& factory) +void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry, const FactoryType& factory) { //If sub tensors are supported then the concat //just needs to make sure that the outputs of the prev layer @@ -45,6 +45,12 @@ void ConcatLayer::CreateTensors(const FactoryType& factory) if (factory.SupportsSubTensors()) { + // check if concat is along the x or y (2 innermost dimensions) + uint32_t concatAxis = m_Param.GetConcatAxis(); + auto numberOfDimensions = m_Param.GetNumDimensions(); + bool isConcatOnXorY = m_Param.GetNumDimensions() >= 3 + && ((concatAxis == numberOfDimensions - 1) || (concatAxis == numberOfDimensions - 2)); + ITensorHandleFactory::FactoryId factoryId = GetOutputSlot(0).GetTensorHandleFactoryId(); std::queue m_ConcatLayers; @@ -59,6 +65,35 @@ void ConcatLayer::CreateTensors(const FactoryType& factory) const unsigned int numInputSlots = currentLayer->GetNumInputSlots(); + // if concat along x or y (2 innermost dimensions) and the previous layers do not require padding + bool canUseSubTensorOnXorY = true; + bool isTensorHandleFactory = std::is_same::value; + if (isTensorHandleFactory) + { + for (unsigned int i = 0; i < numInputSlots; ++i) + { + OutputSlot* slot = currentLayer->GetInputSlot(i).GetConnectedOutputSlot(); + ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); + std::vector capabilities = + handleFactory->GetCapabilities(&(slot->GetOwningLayer()), + currentLayer, + CapabilityClass::PaddingRequired); + if (isConcatOnXorY) + { + canUseSubTensorOnXorY = false; + if (capabilities.empty()) + { + canUseSubTensorOnXorY = true; + } + } + + if (!canUseSubTensorOnXorY) + { + break; + } + } + } + // First go through all the input slots and verify that we can sub-tensor all the inputs. std::vector> subTensors(0); subTensors.reserve(numInputSlots); @@ -74,12 +109,14 @@ void ConcatLayer::CreateTensors(const FactoryType& factory) // 2) the same TensorHandleFactory is used for input and Concat layer output // 3) the input does not come from a Constant layer or input layer // 4) the input is only read by this concat layer + // 5) if concat along x or y (2 innermost dimensions) and the previous layers do not require padding if (slot && parentInfo.IsTypeSpaceMatch(info) && //(1) factoryId == slot->GetTensorHandleFactoryId() && //(2) slot->GetOwningLayer().GetType() != LayerType::Constant && //(3) slot->GetOwningLayer().GetType() != LayerType::Input && //(3) - slot->GetNumConnections() == 1) //(4) + slot->GetNumConnections() == 1 && + canUseSubTensorOnXorY) //(5) { return factory.CreateSubTensorHandle(*parentTensor, info.GetShape(), @@ -137,13 +174,13 @@ void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registr if (factoryId == ITensorHandleFactory::LegacyFactoryId) { - CreateTensors(workloadFactory); + CreateTensors(registry, workloadFactory); } else { ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); ARMNN_ASSERT(handleFactory); - CreateTensors(*handleFactory); + CreateTensors(registry, *handleFactory); } } diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp index 84eba2e7c9..eaa5c15a9c 100644 --- a/src/armnn/layers/ConcatLayer.hpp +++ b/src/armnn/layers/ConcatLayer.hpp @@ -56,7 +56,7 @@ protected: private: template - void CreateTensors(const FactoryType& factory); + void CreateTensors(const TensorHandleFactoryRegistry& registry, const FactoryType& factory); }; diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp index 4e013a37a1..53d5a04b32 100644 --- a/src/backends/neon/NeonTensorHandleFactory.cpp +++ b/src/backends/neon/NeonTensorHandleFactory.cpp @@ -34,17 +34,6 @@ std::unique_ptr NeonTensorHandleFactory::CreateSubTensorHandle(IT const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape()); - // In order for ACL to support subtensors the concat axis cannot be on x or y and the values of x and y - // must match the parent shapes - if (coords.x() != 0 || coords.y() != 0) - { - return nullptr; - } - if ((parentShape.x() != shape.x()) || (parentShape.y() != shape.y())) - { - return nullptr; - } - if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape)) { return nullptr; diff --git a/src/backends/neon/test/NeonTensorHandleTests.cpp b/src/backends/neon/test/NeonTensorHandleTests.cpp index fe5e8f9fb3..8b3e3fdc99 100644 --- a/src/backends/neon/test/NeonTensorHandleTests.cpp +++ b/src/backends/neon/test/NeonTensorHandleTests.cpp @@ -2,9 +2,17 @@ // Copyright © 2020 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // + +#include +#include + #include #include +#include + +#include + #include BOOST_AUTO_TEST_SUITE(NeonTensorHandleTests) @@ -77,4 +85,79 @@ BOOST_AUTO_TEST_CASE(NeonTensorHandleGetCapabilitiesPadding) BOOST_TEST(capabilities[0].m_Value); } +BOOST_AUTO_TEST_CASE(ConcatOnXorYSubTensorsNoPaddinRequiredTest) +{ + armnn::INetworkPtr net(armnn::INetwork::Create()); + + // Set up tensor infos + const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32); + const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32); + const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32); + + armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs); + + // Create the network + armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0"); + input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo); + armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0"); + elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo); + input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0)); + + armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1"); + input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo); + armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1"); + elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo); + input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0)); + + std::array concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() }; + armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation( + concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation"); + concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0)); + elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1)); + + armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output"); + concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + + const armnn::Graph& theGraph = static_cast(optimizedNet.get())->GetGraph(); + + // Load graph into runtime + armnn::NetworkId networkIdentifier; + runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet)); + + // now check the concat how many sub-tensors it is using.. + auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle) + { + if (subTensorHandle && subTensorHandle->GetParent()) + { + return true; + } + return false; + }; + + for (auto&& layer : theGraph) + { + if(layer->GetType() == armnn::LayerType::Concat) + { + unsigned int numberOfSubTensors = 0; + for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) + { + const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot(); + if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData())) + { + ++numberOfSubTensors; + } + } + // sub-tensors should be supported in this configuration + BOOST_CHECK(numberOfSubTensors > 0); + } + } +} + BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1