From 080d45d73c03830cb80b223fd64c546e84d8337a Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Fri, 10 Nov 2023 17:11:53 +0000 Subject: MLCE-1138 Issue with Delegate supporting FP16 models * Fixed issue where backends were asked to support FP16 layers that would be optimized out. * Fixed issue where backends were asked to support non-constant filter and bias tensors when those tensors would be replaced by constant tensors during optimization. Signed-off-by: Mike Kelly Change-Id: Ib54b9cb99d5014e27172841a665daf57d1d5b23d --- delegate/classic/src/Convolution.hpp | 53 ++++++++++++++-- delegate/classic/src/Quantization.hpp | 27 +++++---- delegate/classic/src/SharedFunctions.cpp | 77 ++++++++++++++++++++++++ delegate/classic/src/SharedFunctions.hpp | 6 ++ delegate/opaque/src/Convolution.hpp | 51 ++++++++++++++-- delegate/opaque/src/Quantization.hpp | 26 +++++--- delegate/opaque/src/SharedFunctions.cpp | 100 +++++++++++++++++++++++++++++++ delegate/opaque/src/SharedFunctions.hpp | 6 ++ 8 files changed, 319 insertions(+), 27 deletions(-) diff --git a/delegate/classic/src/Convolution.hpp b/delegate/classic/src/Convolution.hpp index cf0134ec1f..71ecd4c97a 100644 --- a/delegate/classic/src/Convolution.hpp +++ b/delegate/classic/src/Convolution.hpp @@ -56,7 +56,6 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData, { return kTfLiteError; } - const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]]; if (!IsValid(tfLiteContext, tfLiteFilterTensor, operatorCode, nodeIndex)) { @@ -86,6 +85,7 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData, if(biasEnabled) { const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]]; + if (!IsValid(tfLiteContext, tfLiteBiasTensor, operatorCode, nodeIndex)) { return kTfLiteError; @@ -115,6 +115,27 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData, armnn::BackendId setBackend; if (!delegateData.m_Network) { + bool filterIsConst = filterTensorInfo.IsConstant(); + + if (!filterIsConst) + { + filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[1]); + } + armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo); + filterTensorInfoCopy.SetConstant(filterIsConst); + armnn::Optional optionalBiasInfoCopy(biasTensorInfo); + + if (biasEnabled) + { + bool biasIsConst = biasTensorInfo.IsConstant(); + + if (!biasIsConst) + { + biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[2]); + } + optionalBiasInfoCopy.value().SetConstant(biasIsConst); + } + bool isSupported = false; FORWARD_LAYER_SUPPORT_FUNC("CONV2D", tfLiteContext, @@ -125,8 +146,8 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData, inputTensorInfo, outputTensorInfo, descriptor, - filterTensorInfo, - optionalBiasInfo); + filterTensorInfoCopy, + optionalBiasInfoCopy); return isSupported ? kTfLiteOk : kTfLiteError; } @@ -480,6 +501,28 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData, armnn::BackendId setBackend; if (!delegateData.m_Network) { + bool filterIsConst = filterTensorInfo.IsConstant(); + + if (!filterIsConst) + { + filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[1]); + } + armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo); + filterTensorInfoCopy.SetConstant(filterIsConst); + + armnn::Optional optionalBiasInfoCopy(biasTensorInfo); + + if (biasEnabled) + { + bool biasIsConst = biasTensorInfo.IsConstant(); + + if (!biasIsConst) + { + biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[2]); + } + optionalBiasInfoCopy.value().SetConstant(biasIsConst); + } + bool isSupported = false; FORWARD_LAYER_SUPPORT_FUNC("DEPTHWISE_CONV2D", tfLiteContext, @@ -490,8 +533,8 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData, inputTensorInfo, outputTensorInfo, descriptor, - filterTensorInfo, - armnn::Optional(biasTensorInfo)); + filterTensorInfoCopy, + optionalBiasInfoCopy); return isSupported ? kTfLiteOk : kTfLiteError; } diff --git a/delegate/classic/src/Quantization.hpp b/delegate/classic/src/Quantization.hpp index 8291854383..7fcb9c7c44 100644 --- a/delegate/classic/src/Quantization.hpp +++ b/delegate/classic/src/Quantization.hpp @@ -23,7 +23,6 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData, { TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex)); TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex)); - const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors; const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]]; if (IsDynamicTensor(tfLiteInputTensor)) @@ -34,7 +33,6 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData, tfLiteDequantizeOperatorCode, nodeIndex); return kTfLiteError; } - const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]]; if (IsDynamicTensor(tfLiteOutputTensor)) { @@ -54,14 +52,23 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData, armnn::BackendId setBackend; auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported) { - FORWARD_LAYER_SUPPORT_FUNC("DEQUANTIZE", - tfLiteContext, - IsDequantizeSupported, - delegateData.m_Backends, - isSupported, - setBackend, - inputTensorInfo, - outputTensorInfo); + // If this is a Dequantize with a Constant input then will be replaced by a Constant layer that contains the + // dequantized values during optimization so there's no need to check if it can be supported by the backend + if (tflite::IsConstantTensor(&tfLiteInputTensor)) + { + isSupported = true; + } + else + { + FORWARD_LAYER_SUPPORT_FUNC("DEQUANTIZE", + tfLiteContext, + IsDequantizeSupported, + delegateData.m_Backends, + isSupported, + setBackend, + inputTensorInfo, + outputTensorInfo); + } }; if (!delegateData.m_Network) diff --git a/delegate/classic/src/SharedFunctions.cpp b/delegate/classic/src/SharedFunctions.cpp index bcff3a1dd0..53136b521e 100644 --- a/delegate/classic/src/SharedFunctions.cpp +++ b/delegate/classic/src/SharedFunctions.cpp @@ -110,6 +110,83 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData, return isSupported ? kTfLiteOk : kTfLiteError; } +TfLiteNode* GetNodeConnectedToInput(TfLiteContext* tfLiteContext, + int32_t& connectedIndex, + int32_t inputIdx) +{ + TfLiteIntArray* executionPlan = nullptr; + if (tfLiteContext->GetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk) + { + TF_LITE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnDelegate: Unable to get graph execution plan."); + return nullptr; + } + + for (int i = 0; i < executionPlan->size; ++i) + { + connectedIndex = executionPlan->data[i]; + + // If TfLite nodes can be delegated to ArmNN + TfLiteNode* connectedNode = nullptr; + TfLiteRegistration* tfLiteRegistration = nullptr; + if (tfLiteContext->GetNodeAndRegistration( + tfLiteContext, connectedIndex, &connectedNode, &tfLiteRegistration) != kTfLiteOk) + { + TF_LITE_KERNEL_LOG(tfLiteContext, + "TfLiteArmnnDelegate: Unable to get node and registration for node %d.", + connectedIndex); + continue; + } + for (int j= 0; j < connectedNode->outputs->size; ++j) + { + if (connectedNode->outputs->data[j] == inputIdx) + { + return connectedNode; + } + } + } + // No node found so set connectedIndex to -1 + connectedIndex = -1; + return nullptr; +} + +bool WillInputBeOptimizedToConst(TfLiteContext* tfLiteContext, int32_t inputIdx) +{ + int32_t connectedIndex; + TfLiteNode* connectedNode = GetNodeConnectedToInput(tfLiteContext, connectedIndex, inputIdx); + + if (connectedNode) + { + TfLiteRegistration* tfLiteRegistration = nullptr; + + if (tfLiteContext->GetNodeAndRegistration(tfLiteContext, connectedIndex, &connectedNode, &tfLiteRegistration) + == kTfLiteOk) + { + switch (tfLiteRegistration->builtin_code) + { + case kTfLiteBuiltinDequantize: + { + if (connectedNode->inputs->size >= 1) + { + const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors; + const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[connectedNode->inputs->data[0]]; + + // If the input to the Dequantize is a Constant then both that Constant layer and the Dequantize + // layer will be replaced by a single Constant layer containing the dequantized values. + if (tflite::IsConstantTensor(&tfLiteInputTensor)) + { + return true; + } + } + break; + } + default: + { + } + } + } + } + return false; +} } // namespace armnnDelegate diff --git a/delegate/classic/src/SharedFunctions.hpp b/delegate/classic/src/SharedFunctions.hpp index b03a63ded9..78aa3fef0e 100644 --- a/delegate/classic/src/SharedFunctions.hpp +++ b/delegate/classic/src/SharedFunctions.hpp @@ -21,5 +21,11 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData, const armnn::TensorInfo& outputInfo, TfLiteFusedActivation activationType); +TfLiteNode* GetNodeConnectedToInput(TfLiteContext* tfLiteContext, + int32_t& connectedIndex, + int32_t inputIdx); + +bool WillInputBeOptimizedToConst(TfLiteContext* tfLiteContext, int32_t inputIdx); + } // namespace armnnDelegate diff --git a/delegate/opaque/src/Convolution.hpp b/delegate/opaque/src/Convolution.hpp index 384c62b678..e4393e7bb0 100644 --- a/delegate/opaque/src/Convolution.hpp +++ b/delegate/opaque/src/Convolution.hpp @@ -138,6 +138,27 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData, armnn::BackendId setBackend; if (!delegateData.m_Network) { + bool filterIsConst = filterTensorInfo.IsConstant(); + + if (!filterIsConst) + { + filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[1]); + } + armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo); + filterTensorInfoCopy.SetConstant(filterIsConst); + armnn::Optional optionalBiasInfoCopy(biasTensorInfo); + + if (biasEnabled) + { + bool biasIsConst = biasTensorInfo.IsConstant(); + + if (!biasIsConst) + { + biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[2]); + } + optionalBiasInfoCopy.value().SetConstant(biasIsConst); + } + bool isSupported = false; FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("CONV2D", tfLiteContext, @@ -148,8 +169,8 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData, inputTensorInfo, outputTensorInfo, descriptor, - filterTensorInfo, - optionalBiasInfo); + filterTensorInfoCopy, + optionalBiasInfoCopy); return isSupported ? kTfLiteOk : kTfLiteError; } @@ -339,6 +360,28 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData, armnn::BackendId setBackend; if (!delegateData.m_Network) { + bool filterIsConst = filterTensorInfo.IsConstant(); + + if (!filterIsConst) + { + filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[1]); + } + armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo); + filterTensorInfoCopy.SetConstant(filterIsConst); + + armnn::Optional optionalBiasInfoCopy(biasTensorInfo); + + if (biasEnabled) + { + bool biasIsConst = biasTensorInfo.IsConstant(); + + if (!biasIsConst) + { + biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[2]); + } + optionalBiasInfoCopy.value().SetConstant(biasIsConst); + } + bool isSupported = false; FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("DEPTHWISE_CONV2D", tfLiteContext, @@ -349,8 +392,8 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData, inputTensorInfo, outputTensorInfo, descriptor, - filterTensorInfo, - armnn::Optional(biasTensorInfo)); + filterTensorInfoCopy, + optionalBiasInfoCopy); return isSupported ? kTfLiteOk : kTfLiteError; } diff --git a/delegate/opaque/src/Quantization.hpp b/delegate/opaque/src/Quantization.hpp index d7f5c5c73f..e2e5f7618d 100644 --- a/delegate/opaque/src/Quantization.hpp +++ b/delegate/opaque/src/Quantization.hpp @@ -31,6 +31,7 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData, } const TfLiteOpaqueTensor* tfLiteInputTensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputTensors[0]); + if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex)) { return kTfLiteError; @@ -63,14 +64,23 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData, armnn::BackendId setBackend; auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported) { - FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("DEQUANTIZE", - tfLiteContext, - IsDequantizeSupported, - delegateData.m_Backends, - isSupported, - setBackend, - inputTensorInfo, - outputTensorInfo); + // If this is a Dequantize with a Constant input then will be replaced by a Constant layer that contains the + // dequantized values during optimization so there's no need to check if it can be supported by the backend + if (IsConstantTensor(tfLiteInputTensor)) + { + isSupported = true; + } + else + { + FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("DEQUANTIZE", + tfLiteContext, + IsDequantizeSupported, + delegateData.m_Backends, + isSupported, + setBackend, + inputTensorInfo, + outputTensorInfo); + } }; if (!delegateData.m_Network) diff --git a/delegate/opaque/src/SharedFunctions.cpp b/delegate/opaque/src/SharedFunctions.cpp index 93eb143bd0..0a0c630697 100644 --- a/delegate/opaque/src/SharedFunctions.cpp +++ b/delegate/opaque/src/SharedFunctions.cpp @@ -100,5 +100,105 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData, return isSupported ? kTfLiteOk : kTfLiteError; } +TfLiteOpaqueNode* GetNodeConnectedToInput(TfLiteOpaqueContext* tfLiteContext, + int32_t& connectedIndex, + int32_t inputIdx) +{ + TfLiteIntArray* executionPlan = nullptr; + if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk) + { + TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnDelegate: Unable to get graph execution plan."); + return nullptr; + } + + for (int i = 0; i < executionPlan->size; ++i) + { + connectedIndex = executionPlan->data[i]; + + // If TfLite nodes can be delegated to ArmNN + TfLiteOpaqueNode* connectedNode = nullptr; + TfLiteRegistrationExternal* tfLiteRegistration = nullptr; + if (TfLiteOpaqueContextGetNodeAndRegistration( + tfLiteContext, connectedIndex, &connectedNode, &tfLiteRegistration) != kTfLiteOk) + { + TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(tfLiteContext, + "TfLiteArmnnOpaqueDelegate: Unable to get node and registration for node " + "%d.", connectedIndex); + continue; + } + int numOutputs = 0; + const int* outputTensors; + + if (TfLiteOpaqueNodeOutputs(connectedNode, &outputTensors, &numOutputs) != kTfLiteOk) + { + TF_LITE_OPAQUE_MAYBE_KERNEL_LOG( + tfLiteContext, + "TfLiteArmnnOpaqueDelegate: Unable to gather output tensor indices from node #%d: ", + connectedIndex); + continue; + } + + for (int j= 0; j < numOutputs; ++j) + { + if (outputTensors[j] == inputIdx) + { + return connectedNode; + } + } + } + // No node found so set connectedIndex to -1 + connectedIndex = -1; + return nullptr; +} + +bool WillInputBeOptimizedToConst(TfLiteOpaqueContext* tfLiteContext, int32_t inputIdx) +{ + int32_t connectedIndex; + TfLiteOpaqueNode* connectedNode = GetNodeConnectedToInput(tfLiteContext, connectedIndex, inputIdx); + + if (connectedNode) + { + TfLiteRegistrationExternal* tfLiteRegistration = nullptr; + + if (TfLiteOpaqueContextGetNodeAndRegistration(tfLiteContext, connectedIndex, &connectedNode, + &tfLiteRegistration) == kTfLiteOk) + { + switch (TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration)) + { + case kTfLiteBuiltinDequantize: + { + auto numInputs = TfLiteOpaqueNodeNumberOfInputs(connectedNode); + if (numInputs >= 1) + { + const int* inputTensors; + if (TfLiteOpaqueNodeInputs(connectedNode, &inputTensors, &numInputs) != kTfLiteOk) + { + TF_LITE_OPAQUE_MAYBE_KERNEL_LOG( + tfLiteContext, + "TfLiteArmnnOpaqueDelegate: Unable to gather input tensor indices from node #%d: ", + connectedIndex); + return kTfLiteError; + } + const TfLiteOpaqueTensor* tfLiteInputTensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, + inputTensors[0]); + + // If the input to the Dequantize is a Constant then both that Constant layer and the Dequantize + // layer will be replaced by a single Constant layer containing the dequantized values. + if (IsConstantTensor(tfLiteInputTensor)) + { + return true; + } + } + break; + } + default: + { + } + } + } + } + return false; +} + } // namespace armnnDelegate diff --git a/delegate/opaque/src/SharedFunctions.hpp b/delegate/opaque/src/SharedFunctions.hpp index 72fbe6a332..4698a1e989 100644 --- a/delegate/opaque/src/SharedFunctions.hpp +++ b/delegate/opaque/src/SharedFunctions.hpp @@ -23,5 +23,11 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData, const armnn::TensorInfo& outputInfo, TfLiteFusedActivation activationType); +TfLiteOpaqueNode* GetNodeConnectedToInput(TfLiteOpaqueContext* tfLiteContext, + int32_t& connectedIndex, + int32_t inputIdx); + +bool WillInputBeOptimizedToConst(TfLiteOpaqueContext* tfLiteContext, int32_t inputIdx); + } // namespace armnnOpaqueDelegate -- cgit v1.2.1