From 0790dcea1056298d63f97dec904c8ade5d21f439 Mon Sep 17 00:00:00 2001 From: Derek Lamberti Date: Mon, 15 Apr 2019 18:37:35 +0100 Subject: IVGCVSW-2957 MergerLayer subtensor optimization now backend agnostic + Update clframework pin + Cl and Neon Merger workloads updated to use MemoryLayout agnostic API + Workloads only use sub-tensor optimization if ALL input tensors are sub-tensors + Refactor LayerSupportCommon code to be a bit more succinct Change-Id: Ib61ad4ccbd767e924dff07e61022e0cda4069828 Signed-off-by: Derek Lamberti --- src/armnn/layers/MergerLayer.cpp | 56 ++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 8 deletions(-) (limited to 'src/armnn/layers') diff --git a/src/armnn/layers/MergerLayer.cpp b/src/armnn/layers/MergerLayer.cpp index f87f34925f..c674f64f3f 100644 --- a/src/armnn/layers/MergerLayer.cpp +++ b/src/armnn/layers/MergerLayer.cpp @@ -36,14 +36,12 @@ std::unique_ptr MergerLayer::CreateWorkload(const Graph& graph, const void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) { - //If sub tensors are supported than the merger + //If sub tensors are supported then the merger //just needs to make sure that the outputs of the prev layer //are made subtensors of the output of the merger layer. m_OutputHandlers[0].CreateTensorHandles(factory); - unsigned int innerAxis = m_Param.GetNumDimensions() - m_Param.GetConcatAxis(); - - if (factory.SupportsSubTensors() && innerAxis != 1) + if (factory.SupportsSubTensors()) { std::queue m_MergerLayers; @@ -52,23 +50,65 @@ void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact { MergerLayer* currentLayer = m_MergerLayers.front(); ITensorHandle* parentTensor = currentLayer->GetOutputHandler(0).GetData(); - + const TensorInfo& parentInfo = currentLayer->GetOutputHandler(0).GetTensorInfo(); m_MergerLayers.pop(); const unsigned int numInputSlots = currentLayer->GetNumInputSlots(); + + // First go through all the input slots and verify that we can sub-tensor all the inputs. + std::vector> subTensors(0); + subTensors.reserve(numInputSlots); for (unsigned int i = 0; i < numInputSlots; ++i) + { + OutputSlot* slot = currentLayer->GetInputSlot(i).GetConnectedOutputSlot(); + const TensorInfo& info = slot->GetTensorInfo(); + + auto CreateSubTensor = [&]() + { + // Make sure quantization parameters are in the same space + if (parentInfo.IsTypeSpaceMatch(info)) + { + return factory.CreateSubTensorHandle(*parentTensor, + info.GetShape(), + currentLayer->m_Param.GetViewOrigin(i)); + } + return std::unique_ptr(); + }; + + auto subTensor = CreateSubTensor(); + if (!subTensor) + { + break; //Failed to create a valid sub-tensor, so stop trying with the rest of the inputs. + } + else + { + subTensors.push_back(std::move(subTensor)); // store the valid sub-tensor. + } + } + + // Ensure that ALL inputs can be substituted with valid sub-tensors + if (subTensors.size() < numInputSlots) + { + continue; // Don't optimize this Merge layer with sub-tensors + } + + // Substitute input tensors with sub-tensors by replacing the output tensors on the connected layers. + unsigned int i=0; + for (auto& subTensor : subTensors) { OutputSlot* slot = currentLayer->GetInputSlot(i).GetConnectedOutputSlot(); OutputHandler& outputHandler = slot->GetOutputHandler(); - outputHandler.SetData(factory.CreateSubTensorHandle(*parentTensor, - outputHandler.GetTensorInfo().GetShape(), - currentLayer->m_Param.GetViewOrigin(i))); + + BOOST_ASSERT_MSG(subTensor, "MergerLayer: Expected a valid sub-tensor for substitution."); + outputHandler.SetData(std::move(subTensor)); Layer& inputLayer = slot->GetOwningLayer(); if (inputLayer.GetType() == LayerType::Merger) { + // Continue with the substitution if the connected inputs are also merger layers m_MergerLayers.push(boost::polymorphic_downcast(&inputLayer)); } + ++i; } } } -- cgit v1.2.1