From d4fa5456ba596c9fc5e2ab7de836c5157aa7a8f8 Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Mon, 1 Mar 2021 12:31:41 +0000 Subject: IVGCVSW-5741 Update FullyConnected in TfLiteParser to support NonConstWeights !armnn:5180 * Remove unnecessary memcopy for non permeuted const tensors Signed-off-by: Finn Williams Change-Id: Idc3ce2ac001e7d6be61819279de486f093730383 --- src/armnnTfLiteParser/TfLiteParser.cpp | 162 +++++++++++++---------- src/armnnTfLiteParser/TfLiteParser.hpp | 16 ++- src/armnnTfLiteParser/test/FullyConnected.cpp | 177 +++++++++++++++++++++++++- 3 files changed, 275 insertions(+), 80 deletions(-) (limited to 'src') diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp index ab32ef7822..8286007b04 100644 --- a/src/armnnTfLiteParser/TfLiteParser.cpp +++ b/src/armnnTfLiteParser/TfLiteParser.cpp @@ -799,7 +799,7 @@ void TfLiteParserImpl::RegisterConsumerOfTensor(size_t subgraphIndex, ARMNN_ASSERT(m_SubgraphConnections.size() > subgraphIndex); ARMNN_ASSERT(m_SubgraphConnections[subgraphIndex].size() > tensorIndex); - TensorSlots & tensorSlots = m_SubgraphConnections[subgraphIndex][tensorIndex]; + TensorSlots& tensorSlots = m_SubgraphConnections[subgraphIndex][tensorIndex]; tensorSlots.inputSlots.push_back(slot); } @@ -914,9 +914,7 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) CalcPadding(inputWidth, filterWidth, desc.m_StrideX, desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, options->padding); - auto filterTensorAndData = CreateConstTensor(inputs[1], - filterTensorInfo, - armnn::Optional()); + auto filterTensorAndData = CreateConstTensorNonPermuted(inputs[1], filterTensorInfo); armnn::IConnectableLayer* layer = nullptr; auto layerName = fmt::format("Conv2D:{}:{}", subgraphIndex, operatorIndex); @@ -925,18 +923,16 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) { desc.m_BiasEnabled = true; armnn::TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensor(inputs[2], - biasTensorInfo, - armnn::Optional()); + auto biasTensorAndData = CreateConstTensorNonPermuted(inputs[2], biasTensorInfo); layer = m_Network->AddConvolution2dLayer(desc, - filterTensorAndData.first, - Optional(biasTensorAndData.first), + filterTensorAndData, + Optional(biasTensorAndData), layerName.c_str()); } else { layer = m_Network->AddConvolution2dLayer(desc, - filterTensorAndData.first, + filterTensorAndData, EmptyOptional(), layerName.c_str()); } @@ -1005,7 +1001,7 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato CalcPadding(inputWidth, filterWidth, desc.m_StrideX, desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, options->padding); - auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo, permutationVector); + auto filterTensorAndData = CreateConstTensorPermuted(inputs[1], filterTensorInfo, permutationVector); armnn::IConnectableLayer* layer = nullptr; auto layerName = fmt::format("DepthwiseConv2D:{}:{}", subgraphIndex, operatorIndex); @@ -1013,12 +1009,10 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato { desc.m_BiasEnabled = true; TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensor(inputs[2], - biasTensorInfo, - armnn::Optional()); + auto biasTensorAndData = CreateConstTensorNonPermuted(inputs[2], biasTensorInfo); layer = m_Network->AddDepthwiseConvolution2dLayer(desc, filterTensorAndData.first, - Optional(biasTensorAndData.first), + Optional(biasTensorAndData), layerName.c_str()); } else @@ -1210,9 +1204,7 @@ void TfLiteParserImpl::ParseTransposeConv(size_t subgraphIndex, size_t operatorI desc.m_PadRight, options->padding); - auto filterTensorAndData = CreateConstTensor(inputs[1], - filterTensorInfo, - armnn::Optional()); + auto filterTensorAndData = CreateConstTensorNonPermuted(inputs[1], filterTensorInfo); armnn::IConnectableLayer* layer = nullptr; auto layerName = fmt::format("TransposeConv:{}:{}", subgraphIndex, operatorIndex); @@ -1220,18 +1212,16 @@ void TfLiteParserImpl::ParseTransposeConv(size_t subgraphIndex, size_t operatorI if (desc.m_BiasEnabled) { auto biasTensorInfo = ToTensorInfo(inputs[3]); - auto biasConstTensor = CreateConstTensor(inputs[3], - biasTensorInfo, - armnn::Optional()); + auto biasConstTensor = CreateConstTensorNonPermuted(inputs[3], biasTensorInfo); layer = m_Network->AddTransposeConvolution2dLayer(desc, - filterTensorAndData.first, - biasConstTensor.first, + filterTensorAndData, + biasConstTensor, layerName.c_str()); } else { layer = m_Network->AddTransposeConvolution2dLayer(desc, - filterTensorAndData.first, + filterTensorAndData, EmptyOptional(), layerName.c_str()); } @@ -2400,37 +2390,59 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator CHECK_LOCATION().AsString())); } - auto filterTensorAndData = CreateConstTensor(inputs[1], - filterTensorInfo, - armnn::Optional()); armnn::IConnectableLayer* layer = nullptr; auto layerName = fmt::format("FullyConnected:{}:{}", subgraphIndex, operatorIndex); - if (inputs.size() == 3) + Optional filterOptionalConstTensor; + + desc.m_ConstantWeights = IsConstTensor(inputs[1]); + + // Either both weights and biases need to be inputs or both weights and biases need to be constant + if (inputs.size() == 3 && desc.m_ConstantWeights != IsConstTensor(inputs[2])) { - desc.m_BiasEnabled = true; - TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensor(inputs[2], - biasTensorInfo, - armnn::Optional()); - layer = m_Network->AddFullyConnectedLayer(desc, - filterTensorAndData.first, - Optional(biasTensorAndData.first), - layerName.c_str()); + throw ParseException( + fmt::format("Weights and bias are not compatible." + "Node {}", + CHECK_LOCATION().AsString())); + } + + auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); + std::vector tensorIndexesToRegister = {inputTensorIndexes[0]}; + if (desc.m_ConstantWeights) + { + filterOptionalConstTensor = Optional(CreateConstTensorNonPermuted(inputs[1], filterTensorInfo)); } else { - layer = m_Network->AddFullyConnectedLayer(desc, - filterTensorAndData.first, - EmptyOptional(), - layerName.c_str()); + // Non const weights will need to be registered as inputs + tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]); } - ARMNN_ASSERT(layer != nullptr); - armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); + Optional biasOptionalConstTensor; + if (inputs.size() == 3) + { + desc.m_BiasEnabled = true; + if (desc.m_ConstantWeights) + { + TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); + biasOptionalConstTensor = Optional(CreateConstTensorNonPermuted(inputs[2], biasTensorInfo)); + } + else + { + // Non const biases will need to be registered as inputs + tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); + } + } - auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); + layer = m_Network->AddFullyConnectedLayer(desc, + filterOptionalConstTensor, + biasOptionalConstTensor, + layerName.c_str()); + + ARMNN_ASSERT(layer != nullptr); + armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); + unsigned int startingSlotIndex = 0; if (inputTensorInfo.GetNumDimensions() > 2) { // Add reshape to flatten to 2D [batch_size, input_size], @@ -2453,21 +2465,20 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator reshapedTensorInfo.SetShape(armnn::TensorShape{ 2, reshapedDimensions.data() }); std::string reshapeLayerName = fmt::format("Reshape_for:{}", layer->GetName()); - armnn::ReshapeDescriptor desc; - desc.m_TargetShape = reshapedTensorInfo.GetShape(); - armnn::IConnectableLayer* reshapeLayer = m_Network->AddReshapeLayer(desc, layerName.c_str()); + armnn::ReshapeDescriptor reshapeDescriptor; + reshapeDescriptor.m_TargetShape = reshapedTensorInfo.GetShape(); + armnn::IConnectableLayer* reshapeLayer = m_Network->AddReshapeLayer(reshapeDescriptor, layerName.c_str()); reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedTensorInfo); reshapeLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); RegisterInputSlots(subgraphIndex, operatorIndex, reshapeLayer, {inputTensorIndexes[0]}); + // Fc layer connects to the reshape layer, so we skip the first input slot when registering fc's input slots + tensorIndexesToRegister.erase(tensorIndexesToRegister.begin()); + startingSlotIndex = 1; } - else - { - // register the input connection slot for the layer - // only the tensors for the inputs are relevant, exclude the const tensors - RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]}); - } + + RegisterInputSlots(subgraphIndex, operatorIndex, layer, tensorIndexesToRegister, startingSlotIndex); armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0], true); layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); @@ -2523,11 +2534,10 @@ void TfLiteParserImpl::ParseDetectionPostProcess(size_t subgraphIndex, size_t op } armnn::TensorInfo anchorTensorInfo = ToTensorInfo(inputs[2]); - auto anchorTensorAndData = CreateConstTensor(inputs[2], anchorTensorInfo, - armnn::Optional()); + auto anchorTensorAndData = CreateConstTensorNonPermuted(inputs[2], anchorTensorInfo); auto layerName = fmt::format("DetectionPostProcess:{}:{}", subgraphIndex, operatorIndex); - IConnectableLayer* layer = m_Network->AddDetectionPostProcessLayer(desc, anchorTensorAndData.first, + IConnectableLayer* layer = m_Network->AddDetectionPostProcessLayer(desc, anchorTensorAndData, layerName.c_str()); ARMNN_ASSERT(layer != nullptr); @@ -3335,11 +3345,12 @@ std::vector& TfLiteParserImpl::GetOutputTensorIds(const ModelPtr& model void TfLiteParserImpl::RegisterInputSlots(size_t subgraphIndex, size_t operatorIndex, IConnectableLayer* layer, - const std::vector& tensorIndexes) + const std::vector& tensorIndexes, + unsigned int startingSlotIndex) { CHECK_MODEL(m_Model, subgraphIndex, operatorIndex); ARMNN_ASSERT(layer != nullptr); - if (tensorIndexes.size() != layer->GetNumInputSlots()) + if (tensorIndexes.size() + startingSlotIndex != layer->GetNumInputSlots()) { throw ParseException( fmt::format("The number of tensor inputs ({}) does not match the number expected ({})" @@ -3351,10 +3362,10 @@ void TfLiteParserImpl::RegisterInputSlots(size_t subgraphIndex, CHECK_LOCATION().AsString())); } - for (unsigned int slotIndex = 0; slotIndex < layer->GetNumInputSlots(); ++slotIndex) + for (unsigned int index = 0; index < tensorIndexes.size() ; ++index) { - unsigned int tensorIndex = tensorIndexes[slotIndex]; - armnn::IInputSlot* slot = &(layer->GetInputSlot(slotIndex)); + unsigned int tensorIndex = tensorIndexes[index]; + armnn::IInputSlot* slot = &(layer->GetInputSlot(startingSlotIndex + index)); RegisterConsumerOfTensor(subgraphIndex, tensorIndex, slot); } } @@ -3439,13 +3450,11 @@ void TfLiteParserImpl::SetupConstantLayers(size_t subgraphIndex) { TensorRawPtr tensorPtr = subgraphPtr->tensors[tensorIndex].get(); armnn::TensorInfo tensorInfo = ToTensorInfo(tensorPtr); - auto tensorAndData = CreateConstTensor(tensorPtr, - tensorInfo, - armnn::Optional()); + auto tensorAndData = CreateConstTensorNonPermuted(tensorPtr, tensorInfo); std::string layerName = fmt::format("Constant:{}", tensorPtr->name); IConnectableLayer *layer = - m_Network->AddConstantLayer(tensorAndData.first, layerName.c_str()); + m_Network->AddConstantLayer(tensorAndData, layerName.c_str()); layer->GetOutputSlot(0).SetTensorInfo(tensorInfo); RegisterOutputSlots(subgraphIndex, @@ -3480,10 +3489,17 @@ TfLiteParserImpl::CreateConstTensorAndStoreData(TfLiteParserImpl::BufferRawPtr b return std::make_pair(constData.first, std::move(storage)); } +bool TfLiteParserImpl::IsConstTensor(TensorRawPtr tensorPtr) +{ + CHECK_TENSOR_PTR(tensorPtr); + return !tensorPtr->is_variable; +} + + std::pair -TfLiteParserImpl::CreateConstTensor(TensorRawPtr tensorPtr, - armnn::TensorInfo& tensorInfo, - armnn::Optional permutationVector) +TfLiteParserImpl::CreateConstTensorPermuted(TensorRawPtr tensorPtr, + armnn::TensorInfo& tensorInfo, + armnn::Optional permutationVector) { CHECK_TENSOR_PTR(tensorPtr); auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer); @@ -3528,6 +3544,16 @@ TfLiteParserImpl::CreateConstTensor(TensorRawPtr tensorPtr, } } +armnn::ConstTensor TfLiteParserImpl::CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, + armnn::TensorInfo& tensorInfo) +{ + CHECK_TENSOR_PTR(tensorPtr); + auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer); + CHECK_BUFFER_SIZE(bufferPtr, tensorInfo, tensorPtr->buffer); + + return ConstTensor(tensorInfo, bufferPtr->data.data()); +} + BindingPointInfo TfLiteParserImpl::GetNetworkInputBindingInfo(size_t subgraphId, const std::string& name) const { diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp index b59571e7c3..07ff4816ea 100644 --- a/src/armnnTfLiteParser/TfLiteParser.hpp +++ b/src/armnnTfLiteParser/TfLiteParser.hpp @@ -83,6 +83,7 @@ public: static const std::string GetVersion(); private: + // No copying allowed until it is wanted and properly implemented TfLiteParserImpl(const TfLiteParserImpl &) = delete; TfLiteParserImpl & operator=(const TfLiteParserImpl &) = delete; @@ -154,7 +155,8 @@ private: void RegisterInputSlots(size_t subgraphIndex, size_t operatorIndex, armnn::IConnectableLayer* layer, - const std::vector& tensorIndexes); + const std::vector& tensorIndexes, + unsigned int startingSlotIndex = 0); void RegisterOutputSlots(size_t subgraphIndex, size_t operatorIndex, armnn::IConnectableLayer* layer, @@ -194,6 +196,13 @@ private: std::unique_ptr m_Int32Data; }; + bool IsConstTensor(TensorRawPtr tensorPtr); + armnn::ConstTensor CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, + armnn::TensorInfo& tensorInfo); + std::pair + CreateConstTensorPermuted(TensorRawPtr tensorPtr, + armnn::TensorInfo& tensorInfo, + armnn::Optional permutationVector); template std::pair @@ -202,11 +211,6 @@ private: armnn::TensorInfo& tensorInfo, armnn::Optional permutationVector); - std::pair - CreateConstTensor(TensorRawPtr tensorPtr, - armnn::TensorInfo& tensorInfo, - armnn::Optional permutationVector); - // Settings for configuring the TfLiteParser armnn::Optional m_Options; diff --git a/src/armnnTfLiteParser/test/FullyConnected.cpp b/src/armnnTfLiteParser/test/FullyConnected.cpp index e7aa9082e2..333e17fafd 100644 --- a/src/armnnTfLiteParser/test/FullyConnected.cpp +++ b/src/armnnTfLiteParser/test/FullyConnected.cpp @@ -8,18 +8,17 @@ #include "../TfLiteParser.hpp" #include -#include BOOST_AUTO_TEST_SUITE(TensorflowLiteParser) struct FullyConnectedFixture : public ParserFlatbuffersFixture { explicit FullyConnectedFixture(const std::string& inputShape, - const std::string& outputShape, - const std::string& filterShape, - const std::string& filterData, - const std::string biasShape = "", - const std::string biasData = "") + const std::string& outputShape, + const std::string& filterShape, + const std::string& filterData, + const std::string biasShape = "", + const std::string biasData = "") { std::string inputTensors = "[ 0, 2 ]"; std::string biasTensor = ""; @@ -195,4 +194,170 @@ BOOST_FIXTURE_TEST_CASE( true); } + +struct FullyConnectedNonConstWeightsFixture : public ParserFlatbuffersFixture +{ + explicit FullyConnectedNonConstWeightsFixture(const std::string& inputShape, + const std::string& outputShape, + const std::string& filterShape, + const std::string biasShape = "") + { + std::string inputTensors = "[ 0, 1 ]"; + std::string biasTensor = ""; + std::string biasBuffer = ""; + std::string outputs = "2"; + if (biasShape.size() > 0) + { + inputTensors = "[ 0, 1, 2 ]"; + biasTensor = R"( + { + "shape": )" + biasShape + R"(, + "type": "INT32", + "buffer": 2, + "name": "bias", + "quantization": { + "scale": [ 1.0 ], + "zero_point": [ 0 ], + "details_type": 0, + "quantized_dimension": 0 + }, + "is_variable": true + }, )"; + + biasBuffer = R"(,{ "data": [ 10, 0, 0, 0 ] } )"; + outputs = "3"; + } + m_JsonString = R"( + { + "version": 3, + "operator_codes": [ + { + "builtin_code": "FULLY_CONNECTED", + "version": 1 + } + ], + "subgraphs": [ + { + "tensors": [ + { + "shape": )" + inputShape + R"(, + "type": "INT8", + "buffer": 0, + "name": "input_0", + "quantization": { + "scale": [ 1.0 ], + "zero_point": [ 0 ], + "details_type": 0, + "quantized_dimension": 0 + }, + "is_variable": false + }, + { + "shape": )" + filterShape + R"(, + "type": "INT8", + "buffer": 1, + "name": "weights", + "quantization": { + "scale": [ 1.0 ], + "zero_point": [ 0 ], + "details_type": 0, + "quantized_dimension": 0 + }, + "is_variable": true + }, + )" + biasTensor + R"( + { + "shape": )" + outputShape + R"(, + "type": "INT8", + "buffer": 0, + "name": "output", + "quantization": { + "scale": [ + 2.0 + ], + "zero_point": [ + 0 + ], + "details_type": 0, + "quantized_dimension": 0 + }, + "is_variable": false + } + ], + "inputs": )" + inputTensors + R"(, + "outputs": [ )" + outputs + R"( ], + "operators": [ + { + "opcode_index": 0, + "inputs": )" + inputTensors + R"(, + "outputs": [ )" + outputs + R"( ], + "builtin_options_type": "FullyConnectedOptions", + "builtin_options": { + "fused_activation_function": "NONE", + "weights_format": "DEFAULT", + "keep_num_dims": false, + "asymmetric_quantize_inputs": false + }, + "custom_options_format": "FLEXBUFFERS" + } + ] + } + ], + "description": "ArmnnDelegate: FullyConnected Operator Model", + "buffers": [ + { + "data": [] + }, + { + "data": [ 2, 3, 4, 5 ] + } + )" + biasBuffer + R"( + ] + } + )"; + Setup(); + } +}; + +struct FullyConnectedNonConstWeights : FullyConnectedNonConstWeightsFixture +{ + FullyConnectedNonConstWeights() + : FullyConnectedNonConstWeightsFixture("[ 1, 4, 1, 1 ]", // inputShape + "[ 1, 1 ]", // outputShape + "[ 1, 4 ]", // filterShape + "[ 1 ]" ) // biasShape + + {} +}; + +BOOST_FIXTURE_TEST_CASE(ParseFullyConnectedNonConstWeights, FullyConnectedNonConstWeights) +{ + RunTest<2, armnn::DataType::QAsymmS8, + armnn::DataType::Signed32, + armnn::DataType::QAsymmS8>( + 0, + {{{"input_0", { 1, 2, 3, 4 }},{"weights", { 2, 3, 4, 5 }}}}, + {{"bias", { 10 }}}, + {{"output", { 25 }}}); +} + +struct FullyConnectedNonConstWeightsNoBias : FullyConnectedNonConstWeightsFixture +{ + FullyConnectedNonConstWeightsNoBias() + : FullyConnectedNonConstWeightsFixture("[ 1, 4, 1, 1 ]", // inputShape + "[ 1, 1 ]", // outputShape + "[ 1, 4 ]") // filterShape + + {} +}; + +BOOST_FIXTURE_TEST_CASE(ParseFullyConnectedNonConstWeightsNoBias, FullyConnectedNonConstWeightsNoBias) +{ + RunTest<2, armnn::DataType::QAsymmS8, + armnn::DataType::QAsymmS8>( + 0, + {{{"input_0", { 1, 2, 3, 4 }},{"weights", { 2, 3, 4, 5 }}}}, + {{"output", { 20 }}}); +} + BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1