From 42477c1d3e7ddf74863e84ab79dbe6f42e4a0ba3 Mon Sep 17 00:00:00 2001 From: Kevin May Date: Thu, 26 Mar 2020 13:34:14 +0000 Subject: IVGCVSW-4447 Add Hal 1_3 Support * Add new 1.3 files HalPolicy, ArmnnDriver, ArmnnDriverImpl * Add new .rc file for 1.3 service * Add ArmnnPreparedModel_1_3 and implement new functions * Update Android.mk with 1.3 driver and service * Refactor ifdef to include ARMNN_ANDROID_NN_V1_3 * Create Utils getMainModel for new 1.3 Model Main Subgraph * Use android Utils to convertToV1_X in ArmnnPrepapredModel_1_3 * Refactor HAL 1.2 convert functions into ConversionUtils_1_2.hpp * Replace ArmnnBurstExecutorWithCache with call to ExecutionBurstServer Signed-off-by: Kevin May Change-Id: I514069e9e1b16bcd1c4abfb5d563d25ac22d02e3 --- 1.0/HalPolicy.hpp | 1 + 1.1/HalPolicy.hpp | 1 + 1.2/HalPolicy.cpp | 2492 +------------------ 1.2/HalPolicy.hpp | 2 + 1.3/ArmnnDriver.hpp | 294 +++ 1.3/ArmnnDriverImpl.cpp | 338 +++ 1.3/ArmnnDriverImpl.hpp | 40 + 1.3/HalPolicy.cpp | 451 ++++ 1.3/HalPolicy.hpp | 150 ++ Android.mk | 186 ++ ArmnnDriver.hpp | 23 +- ArmnnDriverImpl.cpp | 28 +- ArmnnDriverImpl.hpp | 6 + ArmnnPreparedModel.cpp | 10 +- ArmnnPreparedModel_1_2.cpp | 128 +- ArmnnPreparedModel_1_3.cpp | 698 ++++++ ArmnnPreparedModel_1_3.hpp | 137 ++ ConversionUtils.hpp | 206 +- ConversionUtils_1_2.hpp | 2590 ++++++++++++++++++++ ModelToINetworkConverter.cpp | 36 +- RequestThread.cpp | 13 + Utils.cpp | 96 +- Utils.hpp | 74 +- ...id.hardware.neuralnetworks@1.3-service-armnn.rc | 4 + test/Convolution2D.hpp | 2 - 25 files changed, 5418 insertions(+), 2588 deletions(-) create mode 100644 1.3/ArmnnDriver.hpp create mode 100644 1.3/ArmnnDriverImpl.cpp create mode 100644 1.3/ArmnnDriverImpl.hpp create mode 100644 1.3/HalPolicy.cpp create mode 100644 1.3/HalPolicy.hpp create mode 100644 ArmnnPreparedModel_1_3.cpp create mode 100644 ArmnnPreparedModel_1_3.hpp create mode 100644 ConversionUtils_1_2.hpp create mode 100644 android.hardware.neuralnetworks@1.3-service-armnn.rc diff --git a/1.0/HalPolicy.hpp b/1.0/HalPolicy.hpp index 9eb13b47..25bc47ce 100644 --- a/1.0/HalPolicy.hpp +++ b/1.0/HalPolicy.hpp @@ -26,6 +26,7 @@ public: using Operation = V1_0::Operation; using OperationType = V1_0::OperationType; using getSupportedOperations_cb = V1_0::IDevice::getSupportedOperations_cb; + using ErrorStatus = V1_0::ErrorStatus; static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data); diff --git a/1.1/HalPolicy.hpp b/1.1/HalPolicy.hpp index 806686bf..18bb705c 100644 --- a/1.1/HalPolicy.hpp +++ b/1.1/HalPolicy.hpp @@ -26,6 +26,7 @@ public: using Operation = V1_1::Operation; using OperationType = V1_1::OperationType; using getSupportedOperations_cb = V1_1::IDevice::getSupportedOperations_1_1_cb; + using ErrorStatus = V1_0::ErrorStatus; static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data); diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp index ca92318e..9e547fab 100644 --- a/1.2/HalPolicy.cpp +++ b/1.2/HalPolicy.cpp @@ -4,17 +4,6 @@ // #include "HalPolicy.hpp" -#include "Utils.hpp" - -#include - -#include -#include - -#include - -#include -#include namespace armnn_driver { @@ -26,58 +15,6 @@ using namespace armnn; namespace { -bool IsQSymmDequantizeForWeights(const HalPolicy::Operation& operation, const HalPolicy::Model& model) -{ - const HalPolicy::Operand* operand = GetInputOperand(operation, 0, model); - if (!operand) - { - return false; - } - - if(!IsQSymm8(*operand)) - { - // Only QSymm8 weights are dequantized on the fly by the driver - return false; - } - - if (!IsOperandConstant(*operand)) - { - // Non-const input is not accepted for weights - return false; - } - - // Iterate through all the operations and find the operation feeding from the Dequantize output - const size_t outputIndex = operation.outputs[0]; - for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); ++operationIdx) - { - const auto& operationIt = model.operations[operationIdx]; - switch (operationIt.type) - { - case HalPolicy::OperationType::FULLY_CONNECTED: - if (outputIndex == operationIt.inputs[1]) // Weights are bound to slot 1 - { - // If the output is going into the FC weights return true - return true; - } - break; - case HalPolicy::OperationType::LSTM: - for (size_t k = 0; k < operationIt.inputs.size(); ++k) - { - if (outputIndex == operationIt.inputs[k]) - { - // If the output is going into the LSTM weights return true - return true; - } - } - break; - default: - break; - } - } - - return false; -} - } // anonymous namespace bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, ConversionData& data) @@ -237,57 +174,7 @@ bool HalPolicy::ConvertComparison(const Operation& operation, ComparisonOperation comparisonOperation) { ALOGV("hal_1_2::HalPolicy::ConvertComparison()"); - ALOGV("comparisonOperation = %s", GetComparisonOperationAsCString(comparisonOperation)); - - LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0, model, data); - LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1, model, data); - - if (!(input0.IsValid() && input1.IsValid())) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& inputInfo0 = input0.GetTensorInfo(); - const TensorInfo& inputInfo1 = input1.GetTensorInfo(); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - ComparisonDescriptor descriptor(comparisonOperation); - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsComparisonSupported, - data.m_Backends, - isSupported, - inputInfo0, - inputInfo1, - outputInfo, - descriptor); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddComparisonLayer(descriptor); - assert(layer != nullptr); - bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data); - if (!isReshapeSupported) - { - return false; - } - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); + return ::ConvertComparison_1_2(operation, model, data, comparisonOperation); } bool HalPolicy::ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data) @@ -299,153 +186,7 @@ bool HalPolicy::ConvertConcatenation(const Operation& operation, const Model& mo bool HalPolicy::ConvertConv2d(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertConv2d()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - Convolution2dDescriptor desc; - desc.m_DataLayout = DataLayout::NHWC; - - // Determine whether padding is implicit or explicit - bool implicitPadding = operation.inputs.size() == 7 || - (operation.inputs.size() >= 8 && - GetInputOperand(operation, 7, model)->type == OperandType::BOOL); - - if (implicitPadding) - { - desc.m_DataLayout = OptionalDataLayout(operation, 7, model, data); - } - else if (operation.inputs.size() >= 10) - { - desc.m_DataLayout = OptionalDataLayout(operation, 10, model, data); - } - - const PermutationVector OHWIToOIHW = {0, 2, 3, 1}; - - // ArmNN does not currently support non-fixed weights or bias - // The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in] but ArmNN expects the - // filter's height and width indices to match the input's height and width indices so we permute it to OIHW if - // the DataLayout is NCHW - const ConstTensorPin weightsPin = (desc.m_DataLayout == DataLayout::NCHW) ? - ConvertOperationInputToConstTensorPin(operation, 1, model, data, OHWIToOIHW) : - ConvertOperationInputToConstTensorPin(operation, 1, model, data); - const ConstTensorPin biasPin = - ConvertOperationInputToConstTensorPin(operation, 2, model, data); - - if (!weightsPin.IsValid()) - { - return Fail("%s: Operation has invalid weights", __func__); - } - - if (!biasPin.IsValid()) - { - return Fail("%s: Operation has invalid biases", __func__); - } - - ConstTensor weights = weightsPin.GetConstTensor(); - ConstTensor bias = biasPin.GetConstTensor(); - SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo); - - ActivationFn activation; - - if (implicitPadding) - { - android::nn::PaddingScheme paddingScheme; - if (!GetInputPaddingScheme(operation, 3, paddingScheme, model, data) || - !GetInputScalar(operation, 4, OperandType::INT32, desc.m_StrideX, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, desc.m_StrideY, model, data) || - !GetInputActivationFunction(operation, 6, activation, model, data) || - !GetOptionalConvolutionDilationParams(operation, 8, desc, model, data)) - { - return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); - } - - armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout); - unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); - const uint32_t kernelX = weights.GetShape()[widthIndex]; - const uint32_t kernelY = weights.GetShape()[heightIndex]; - const uint32_t inputX = inputInfo.GetShape()[widthIndex]; - const uint32_t inputY = inputInfo.GetShape()[heightIndex]; - - CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, paddingScheme); - CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_DilationY, desc.m_PadTop, desc.m_PadBottom, paddingScheme); - - } - else if (operation.inputs.size() >= 10) - { - // explicit padding - if (!GetInputScalar(operation, 3, OperandType::INT32, desc.m_PadLeft, model, data) || - !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PadRight, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, desc.m_PadTop, model, data) || - !GetInputScalar(operation, 6, OperandType::INT32, desc.m_PadBottom, model, data) || - !GetInputScalar(operation, 7, OperandType::INT32, desc.m_StrideX, model, data) || - !GetInputScalar(operation, 8, OperandType::INT32, desc.m_StrideY, model, data) || - !GetInputActivationFunction(operation, 9, activation, model, data) || - !GetOptionalConvolutionDilationParams(operation, 11, desc, model, data)) - { - return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); - } - } - else - { - return Fail("%s: Unsupported number of operation inputs", __func__); - } - - desc.m_BiasEnabled = true; - Optional biases(bias.GetInfo()); - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsConvolution2dSupported, - data.m_Backends, - isSupported, - inputInfo, - outputInfo, - desc, - weights.GetInfo(), - biases); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* startLayer = - data.m_Network->AddConvolution2dLayer(desc, weights, Optional(bias)); - - if (!startLayer) - { - return Fail("%s: AddConvolution2dLayer failed", __func__); - } - - IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, startLayer, data); - - if (!endLayer) - { - return Fail("%s: ProcessActivation failed", __func__); - } - - input.Connect(startLayer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); + return ::ConvertConv2d_1_2(operation, model, data); } bool HalPolicy::ConvertDepthToSpace(const Operation& operation, const Model& model, ConversionData& data) @@ -457,187 +198,13 @@ bool HalPolicy::ConvertDepthToSpace(const Operation& operation, const Model& mod bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertDepthwiseConv2d()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - // ArmNN does not currently support non-fixed weights or bias - // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ] - const Operand* weightsOperand = GetInputOperand(operation, 1, model); - - if (weightsOperand == nullptr) - { - return Fail("%s: Operand is invalid", __func__); - } - if ( weightsOperand->dimensions[0] != 1) - { - return Fail("%s: Invalid weights; for depthwise convolution, dimension 0 must be 1 but it is %i", - __func__, weightsOperand->dimensions[0] ); - } - - DepthwiseConvolution2dDescriptor desc; - desc.m_DataLayout = DataLayout::NHWC; - - // Determine whether padding is implicit or explicit - bool implicitPadding = operation.inputs.size() == 8 || - (operation.inputs.size() >= 9 && - GetInputOperand(operation, 8, model)->type == OperandType::BOOL); - - // Look ahead to find the optional DataLayout, if present - const uint32_t dataLayoutFlagIndex = implicitPadding ? 8 : 11; - desc.m_DataLayout = OptionalDataLayout(operation, dataLayoutFlagIndex, model, data); - - armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout); - unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); - unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); - - // Reinterpret weight data as [ H, W, I, M ] - TensorShape weightsShape({ weightsOperand->dimensions[1], - weightsOperand->dimensions[2], - inputInfo.GetShape()[channelsIndex], - weightsOperand->dimensions[3] / inputInfo.GetShape()[channelsIndex] }); - - // Swizzle weight data [ H, W, I, M ] -> [ M, I, H, W ] - const PermutationVector HWIMToMIHW = { 2U, 3U, 1U, 0U }; - - const ConstTensorPin weightsPin = - ConvertOperationInputToConstTensorPin(operation, - 1, - model, - data, - HWIMToMIHW, - &weightsShape); - - // Bias is a 1D tensor - const ConstTensorPin biasPin = - ConvertOperationInputToConstTensorPin(operation, 2, model, data); - - if (!weightsPin.IsValid()) - { - return Fail("%s: Operation has invalid weights", __func__); - } - - if (!biasPin.IsValid()) - { - return Fail("%s: Operation has invalid biases", __func__); - } - - ConstTensor weights = weightsPin.GetConstTensor(); - ConstTensor bias = biasPin.GetConstTensor(); - SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo); - - ActivationFn activation; - - if (implicitPadding) - { - android::nn::PaddingScheme paddingScheme; - if (!GetInputPaddingScheme(operation, 3, paddingScheme, model, data) || - !GetInputScalar(operation, 4, OperandType::INT32, desc.m_StrideX, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, desc.m_StrideY, model, data) || - !GetInputActivationFunction(operation, 7, activation, model, data) || - !GetOptionalConvolutionDilationParams(operation, 9, desc, model, data)) - { - return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); - } - - const uint32_t kernelX = weights.GetShape()[3]; - const uint32_t kernelY = weights.GetShape()[2]; - const uint32_t inputX = inputInfo.GetShape()[widthIndex]; - const uint32_t inputY = inputInfo.GetShape()[heightIndex]; - - CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, paddingScheme); - CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_DilationY, desc.m_PadTop, desc.m_PadBottom, paddingScheme); - } - else if (operation.inputs.size() >= 11) - { - // explicit padding - if (!GetInputScalar(operation, 3, OperandType::INT32, desc.m_PadLeft, model, data) || - !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PadRight, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, desc.m_PadTop, model, data) || - !GetInputScalar(operation, 6, OperandType::INT32, desc.m_PadBottom, model, data) || - !GetInputScalar(operation, 7, OperandType::INT32, desc.m_StrideX, model, data) || - !GetInputScalar(operation, 8, OperandType::INT32, desc.m_StrideY, model, data) || - !GetInputActivationFunction(operation, 10, activation, model, data) || - !GetOptionalConvolutionDilationParams(operation, 12, desc, model, data)) - { - return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); - } - } - else - { - return Fail("%s: Unsupported number of operation inputs", __func__); - } - - desc.m_BiasEnabled = true; - Optional biases(bias.GetInfo()); - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsDepthwiseConvolutionSupported, - data.m_Backends, - isSupported, - inputInfo, - outputInfo, - desc, - weights.GetInfo(), - biases); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* startLayer = - data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, Optional(bias)); - - if (!startLayer) - { - return Fail("%s: AddDepthwiseConvolution2dLayer failed", __func__); - } - - IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, startLayer, data); - if (!endLayer) - { - return Fail("%s: ProcessActivation failed", __func__); - } - - input.Connect(startLayer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); + return ::ConvertDepthwiseConv2d_1_2(operation, model, data); } bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertDequantize()"); - - if (IsQSymmDequantizeForWeights(operation, model)) - { - // NOTE: QSymm8 weights are dequantized internally by the driver, - // therefore this type of Dequantize is implicitly supported - return true; - } - - return ::ConvertDequantize(operation, model, data); + return ::ConvertDequantize_1_2(operation, model, data); } bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data) @@ -652,120 +219,13 @@ bool HalPolicy::ConvertElementwiseUnary(const Operation& operation, UnaryOperation unaryOperation) { ALOGV("hal_1_2::HalPolicy::ConvertElementwiseUnary()"); - ALOGV("unaryOperation = %s", GetUnaryOperationAsCString(unaryOperation)); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid input", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - ElementwiseUnaryDescriptor descriptor(unaryOperation); - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsElementwiseUnarySupported, - data.m_Backends, - isSupported, - inputInfo, - outputInfo, - descriptor); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddElementwiseUnaryLayer(descriptor); - assert(layer != nullptr); - - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); + return ::ConvertElementwiseUnary(operation, model, data, unaryOperation); } bool HalPolicy::ConvertExpandDims(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertExpandDims()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid input", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Operation has invalid output", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - int32_t axis; - if (!GetInputScalar(operation, 1, OperandType::INT32, axis, model, data)) - { - return Fail("%s: failed to get axis input value", __func__); - } - - TensorShape targetShape; - - try - { - targetShape = armnnUtils::ExpandDims(input.GetTensorInfo().GetShape(), axis); - } - catch (const std::exception &e) - { - return Fail("%s: %s", __func__, e.what()); - } - - if (targetShape != outputInfo.GetShape()) - { - return Fail("%s: Shape of the output operand does not match the resolved expanded shape", __func__); - } - - ReshapeDescriptor reshapeDescriptor; - reshapeDescriptor.m_TargetShape = targetShape; - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsReshapeSupported, - data.m_Backends, - isSupported, - input.GetTensorInfo(), - outputInfo, - reshapeDescriptor); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddReshapeLayer(reshapeDescriptor); - assert(layer != nullptr); - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); + return ::ConvertExpandDims(operation, model, data); } bool HalPolicy::ConvertFloor(const Operation& operation, const Model& model, ConversionData& data) @@ -783,582 +243,58 @@ bool HalPolicy::ConvertFullyConnected(const Operation& operation, const Model& m bool HalPolicy::ConvertGroupedConv2d(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertGroupedConv2d()"); + return ::ConvertGroupedConv2d(operation, model, data); +} - // - // Parse data - // - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - const TensorInfo& inputInfo = input.GetTensorInfo(); - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - // Look ahead to determine data layout - DataLayout dataLayout = DataLayout::NHWC; - if (operation.inputs.size() == 12) - { - dataLayout = OptionalDataLayout(operation, 11, model, data); - } - else - { - dataLayout = OptionalDataLayout(operation, 8, model, data); - } +bool HalPolicy::ConvertInstanceNormalization(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertInstanceNormalization()"); + return ::ConvertInstanceNormalization(operation, model, data); +} - // NOTE: - // NNAPI weights are always OHWI, i.e. [depth_out, filter_height, filter_width, depth_group], - // but Arm NN expects the filter's height and width indices to match the input's height and - // width indices so when the DataLayout is NCHW, we need to permute the weights to OIHW - const PermutationVector ohwiToOihw = { 0u, 2u, 3u, 1u }; - const ConstTensorPin weightsPin = (dataLayout == DataLayout::NCHW) ? - ConvertOperationInputToConstTensorPin(operation, 1, model, data, ohwiToOihw) : - ConvertOperationInputToConstTensorPin(operation, 1, model, data); - const ConstTensorPin biasesPin = - ConvertOperationInputToConstTensorPin(operation, 2, model, data); - if (!weightsPin.IsValid() || !biasesPin.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } +bool HalPolicy::ConvertL2Normalization(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertL2Normalization()"); + return ::ConvertL2Normalization(operation, model, data); +} - ConstTensor weights = weightsPin.GetConstTensor(); - ConstTensor biases = biasesPin.GetConstTensor(); - SanitizeBiasQuantizationScale(biases.GetInfo(), weights.GetInfo(), inputInfo); +bool HalPolicy::ConvertL2Pool2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertL2Pool2d()"); + return ConvertPooling2d(operation, __func__, PoolingAlgorithm::L2, model, data); +} - const TensorShape& inputShape = inputInfo.GetShape(); - const TensorShape& outputShape = outputInfo.GetShape(); - const TensorShape& weightsShape = weights.GetShape(); - const TensorShape& biasesShape = biases.GetShape(); +bool HalPolicy::ConvertLocalResponseNormalization(const Operation& operation, + const Model& model, + ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertLocalResponseNormalization()"); + return ::ConvertLocalResponseNormalization(operation, model, data); +} - armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout); - const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); - const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); - const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); +bool HalPolicy::ConvertLogistic(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertLogistic()"); + return ::ConvertLogistic(operation, model, data); +} - Convolution2dDescriptor desc; - desc.m_DataLayout = dataLayout; - desc.m_BiasEnabled = true; +bool HalPolicy::ConvertLogSoftmax(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertLogSoftmax()"); + return ::ConvertLogSoftmax(operation, model, data); +} - int numGroups; - ActivationFn activation; +bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertMaxPool2d()"); + return ConvertPooling2d(operation, __func__, PoolingAlgorithm::Max, model, data); +} - if (operation.inputs.size() == 12) - { - if (!GetInputScalar(operation, 3, OperandType::INT32, desc.m_PadLeft, model, data) || - !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PadRight, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, desc.m_PadTop, model, data) || - !GetInputScalar(operation, 6, OperandType::INT32, desc.m_PadBottom, model, data) || - !GetInputScalar(operation, 7, OperandType::INT32, desc.m_StrideX, model, data) || - !GetInputScalar(operation, 8, OperandType::INT32, desc.m_StrideY, model, data) || - !GetInputScalar(operation, 9, OperandType::INT32, numGroups, model, data) || - !GetInputActivationFunction(operation, 10, activation, model, data)) - { - return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); - } - - } - else if (operation.inputs.size() == 9) - { - android::nn::PaddingScheme paddingScheme; - if (!GetInputPaddingScheme(operation, 3, paddingScheme, model, data) || - !GetInputScalar(operation, 4, OperandType::INT32, desc.m_StrideX, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, desc.m_StrideY, model, data) || - !GetInputScalar(operation, 6, OperandType::INT32, numGroups, model, data) || - !GetInputActivationFunction(operation, 7, activation, model, data)) - { - return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); - } - - const uint32_t inputX = inputInfo.GetShape()[widthIndex]; - const uint32_t inputY = inputInfo.GetShape()[heightIndex]; - - const uint32_t kernelX = weightsShape[widthIndex]; - const uint32_t kernelY = weightsShape[heightIndex]; - - CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, paddingScheme); - CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, paddingScheme); - } - else - { - return Fail("%s: Unsupported number of operation inputs", __func__); - } - - const unsigned int outputChannels = outputShape[channelsIndex]; - - const unsigned int channelsPerGroup = weightsShape[channelsIndex]; - const unsigned int channelMultiplier = outputChannels / numGroups; - - // - // Validate all relevant inputs - // - if (numGroups <= 0) - { - return Fail("%s: Number of groups must be greater than 0. Got: %d", __func__, numGroups); - } - - if (outputChannels % numGroups != 0u) - { - return Fail("%s: Output channels must be divisible by the number of groups", __func__); - } - - // - // Set up Splitter layer - // - unsigned int splitterDimSizes[4] = { inputShape[0], inputShape[1], inputShape[2], inputShape[3] }; - splitterDimSizes[channelsIndex] /= numGroups; // split in depth - - TensorInfo splitterOutputInfo(4, - splitterDimSizes, - inputInfo.GetDataType(), - inputInfo.GetQuantizationScale(), - inputInfo.GetQuantizationOffset()); - - std::vector> splitterOutputInfos(numGroups, std::ref(splitterOutputInfo)); - - ViewsDescriptor splitterDesc(numGroups); - for (unsigned int group = 0u; group < numGroups; ++group) - { - splitterDesc.SetViewOriginCoord(group, channelsIndex, splitterDimSizes[channelsIndex] * group); - for (unsigned int dimIdx = 0u; dimIdx < 4u; dimIdx++) - { - splitterDesc.SetViewSize(group, dimIdx, splitterDimSizes[dimIdx]); - } - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsSplitterSupported, - data.m_Backends, - isSupported, - inputInfo, - splitterOutputInfos, - splitterDesc); - if (!isSupported) - { - return false; - } - - IConnectableLayer* splitterLayer = data.m_Network->AddSplitterLayer(splitterDesc); - if (!splitterLayer) - { - return Fail("%s: Failed to add SplitterLayer", __func__); - } - - input.Connect(splitterLayer->GetInputSlot(0)); - for (unsigned int group = 0u; group < splitterLayer->GetNumOutputSlots(); ++group) - { - splitterLayer->GetOutputSlot(group).SetTensorInfo(splitterOutputInfo); - } - - // - // Set up Convolution2d layers for each group - // - - // Set up group tensor shapes - TensorShape groupInputShape(inputShape); - groupInputShape[channelsIndex] = channelsPerGroup; - - TensorShape groupOutputShape(outputShape); - groupOutputShape[channelsIndex] = 1; - - TensorShape groupWeightsShape(weightsShape); - groupWeightsShape[0] /= channelMultiplier * numGroups; - - TensorShape groupBiasesShape({ 1 }); - - // Set up group tensor infos - TensorInfo groupInputInfo(inputInfo); - groupInputInfo.SetShape(groupInputShape); - - const TensorInfo& weightsInfo = weights.GetInfo(); - TensorInfo groupWeightsInfo(weightsInfo); - groupWeightsInfo.SetShape(groupWeightsShape); - - const TensorInfo& biasesInfo = biases.GetInfo(); - TensorInfo groupBiasesInfo(biasesInfo); - groupBiasesInfo.SetShape(groupBiasesShape); - - TensorInfo groupOutputInfo(outputInfo); - groupOutputInfo.SetShape(groupOutputShape); - - const unsigned int weightsDataTypeSize = GetDataTypeSize(groupWeightsInfo.GetDataType()); - const unsigned int biasesDataTypeSize = GetDataTypeSize(groupBiasesInfo.GetDataType()); - - std::vector convLayers(numGroups * channelMultiplier, nullptr); - for (unsigned int group = 0u; group < numGroups; ++group) - { - for (unsigned int m = 0u; m < channelMultiplier; ++m) - { - auto index = group * channelMultiplier + m; - - const unsigned int weightsDataOffset = groupWeightsShape.GetNumElements() * index * weightsDataTypeSize; - const unsigned int biasesDataOffset = groupBiasesShape.GetNumElements() * index * biasesDataTypeSize; - - if (weightsInfo.HasPerAxisQuantization()) - { - // Extract per-axis quantization scales for group weights - const std::vector& weightsQuantScales = weightsInfo.GetQuantizationScales(); - groupWeightsInfo.SetQuantizationScales( - std::vector(weightsQuantScales.begin() + index, - weightsQuantScales.begin() + index + groupWeightsShape[0])); - - // Extract per-axis quantization scales for group biases - const std::vector& biasesQuantScales = biasesInfo.GetQuantizationScales(); - groupBiasesInfo.SetQuantizationScales( - std::vector(biasesQuantScales.begin() + index, - biasesQuantScales.begin() + index + groupWeightsShape[0])); - } - - // Extract weights and biases data for current group convolution - ConstTensor groupWeights(groupWeightsInfo, - static_cast(reinterpret_cast(weights.GetMemoryArea()) + - weightsDataOffset)); - ConstTensor groupBiases(groupBiasesInfo, - static_cast(reinterpret_cast(biases.GetMemoryArea()) + - biasesDataOffset)); - - isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsConvolution2dSupported, - data.m_Backends, - isSupported, - groupInputInfo, - groupOutputInfo, - desc, - groupWeightsInfo, - Optional(groupBiasesInfo)); - if (!isSupported) - { - return false; - } - - IConnectableLayer *convLayer = - data.m_Network->AddConvolution2dLayer(desc, groupWeights, Optional(groupBiases)); - if (!convLayer) - { - return Fail("%s: AddConvolution2dLayer failed", __func__); - } - - splitterLayer->GetOutputSlot(group).Connect(convLayer->GetInputSlot(0)); - convLayer->GetOutputSlot(0).SetTensorInfo(groupOutputInfo); - - convLayers[index] = convLayer; - } - } - - // - // Set up Concat layer - // - ConcatDescriptor concatDescriptor(outputInfo.GetShape()[channelsIndex]); - for (unsigned int group = 0u; group < numGroups; ++group) - { - for (unsigned int m = 0u; m < channelMultiplier; ++m) - { - auto index = group * channelMultiplier + m; - concatDescriptor.SetViewOriginCoord(index, channelsIndex, index); - concatDescriptor.SetConcatAxis(channelsIndex); - } - } - - isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsConcatSupported, - data.m_Backends, - isSupported, - std::vector(numGroups * channelMultiplier, &groupOutputInfo), - outputInfo, - concatDescriptor); - if (!isSupported) - { - return false; - } - - IConnectableLayer* concatLayer = data.m_Network->AddConcatLayer(concatDescriptor); - if (!concatLayer) - { - return Fail("%s: AddConcatLayer failed", __func__); - } - - for (unsigned int group = 0u; group < numGroups; ++group) - { - for (unsigned int m = 0u; m < channelMultiplier; ++m) - { - auto index = group * channelMultiplier + m; - convLayers[index]->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(index)); - } - } - concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); - - // - // Set up Activation layer (if it is set) - // - IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, concatLayer, data); - if (!endLayer) - { - return Fail("%s: ProcessActivation failed", __func__); - } - - return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); -} - -bool HalPolicy::ConvertInstanceNormalization(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertInstanceNormalization()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Operation has an invalid input 0", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Operation has an invalid output", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - // Determine data type of input tensor - OperandType inputType; - if (!GetOperandType(operation, 0, model, inputType)) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - InstanceNormalizationDescriptor desc; - - // Read gamma, beta & epsilon - if (inputType == OperandType::TENSOR_FLOAT16) - { - Half fp16Gamma; - Half fp16Beta; - Half fp16Epsilon; - - if (!GetInputScalar(operation, 1, OperandType::FLOAT16, fp16Gamma, model, data) || - !GetInputScalar(operation, 2, OperandType::FLOAT16, fp16Beta, model, data) || - !GetInputScalar(operation, 3, OperandType::FLOAT16, fp16Epsilon, model, data)) - { - return Fail("%s: Operation has invalid inputs (FLOAT16)", __func__); - } - - desc.m_Gamma = static_cast(fp16Gamma); - desc.m_Beta = static_cast(fp16Beta); - desc.m_Eps = static_cast(fp16Epsilon); - } - else if (inputType == OperandType::TENSOR_FLOAT32) - { - if (!GetInputScalar(operation, 1, OperandType::FLOAT32, desc.m_Gamma, model, data) || - !GetInputScalar(operation, 2, OperandType::FLOAT32, desc.m_Beta, model, data) || - !GetInputScalar(operation, 3, OperandType::FLOAT32, desc.m_Eps, model, data)) - { - return Fail("%s: Operation has invalid inputs (FLOAT32)", __func__); - } - } - else - { - return Fail("%s: Unsupported input tensor type: %d", __func__, inputType); - } - - desc.m_DataLayout = OptionalDataLayout(operation, 4, model, data); - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsInstanceNormalizationSupported, - data.m_Backends, - isSupported, - input.GetTensorInfo(), - outputInfo, - desc); - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddInstanceNormalizationLayer(desc); - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); -} - -bool HalPolicy::ConvertL2Normalization(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertL2Normalization()"); - return ::ConvertL2Normalization(operation, model, data); -} - -bool HalPolicy::ConvertL2Pool2d(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertL2Pool2d()"); - return ConvertPooling2d(operation, __func__, PoolingAlgorithm::L2, model, data); -} - -bool HalPolicy::ConvertLocalResponseNormalization(const Operation& operation, - const Model& model, - ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertLocalResponseNormalization()"); - return ::ConvertLocalResponseNormalization(operation, model, data); -} - -bool HalPolicy::ConvertLogistic(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertLogistic()"); - return ::ConvertLogistic(operation, model, data); -} - -bool HalPolicy::ConvertLogSoftmax(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertLogSoftmax()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Failed to read input 0", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Failed to read output", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - // Determine data type of input tensor - OperandType inputType; - if (!GetOperandType(operation, 0, model, inputType)) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - LogSoftmaxDescriptor descriptor; - - // Read beta - if (inputType == OperandType::TENSOR_FLOAT16) - { - Half fp16Beta; - if (!GetInputScalar(operation, 1, OperandType::FLOAT16, fp16Beta, model, data)) - { - return Fail("%s: Failed to read input 1 (FLOAT16)", __func__); - } - - descriptor.m_Beta = static_cast(fp16Beta); - } - else if (inputType == OperandType::TENSOR_FLOAT32) - { - if (!GetInputScalar(operation, 1, OperandType::FLOAT32, descriptor.m_Beta, model, data)) - { - return Fail("%s: Failed to read input 1 (FLOAT32)", __func__); - } - } - else - { - return Fail("%s: Unsupported input tensor type: %d", __func__, inputType); - } - - // Read axis - if (!GetInputInt32(operation, 2, descriptor.m_Axis, model, data)) - { - return Fail("%s: Failed to read input 2", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsLogSoftmaxSupported, - data.m_Backends, - isSupported, - input.GetTensorInfo(), - outputInfo, - descriptor); - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddLogSoftmaxLayer(descriptor); - if (!layer) - { - return Fail("%s: AddLogSoftmaxLayer() returned nullptr", __func__); - } - - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); -} - -bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertMaxPool2d()"); - return ConvertPooling2d(operation, __func__, PoolingAlgorithm::Max, model, data); -} - -bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertMaximum()"); - - LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0, model, data); - LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1, model, data); - - if (!input0.IsValid() || !input1.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* outputOperand = GetOutputOperand(operation, 0, model); - if (!outputOperand) - { - return Fail("%s: Could not read output", __func__); - } - - const TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand); - if (IsDynamicTensor(outInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsMaximumSupported, - data.m_Backends, - isSupported, - input0.GetTensorInfo(), - input1.GetTensorInfo(), - outInfo); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddMaximumLayer(); - assert(layer != nullptr); - bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data); - if (!isReshapeSupported) - { - return false; - } - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); -} +bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertMaximum()"); + return ::ConvertMaximum(operation, model, data); +} bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data) { @@ -1369,50 +305,7 @@ bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, Conv bool HalPolicy::ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertMinimum()"); - - LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0, model, data); - LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1, model, data); - - if (!input0.IsValid() || !input1.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsMinimumSupported, - data.m_Backends, - isSupported, - input0.GetTensorInfo(), - input1.GetTensorInfo(), - outputInfo); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* const layer = data.m_Network->AddMinimumLayer(); - assert(layer != nullptr); - bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data); - if (!isReshapeSupported) - { - return false; - } - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); + return ::ConvertMinimum(operation, model, data); } bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data) @@ -1421,410 +314,34 @@ bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, Conve return ::ConvertMul(operation, model, data); } -bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertPad()"); - return ::ConvertPad(operation, model, data); -} - -bool HalPolicy::ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertPadV2()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Could not read input 0", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - unsigned int rank = inputInfo.GetNumDimensions(); - - PadDescriptor descriptor; - if (!ConvertPaddings(operation, model, data, rank, descriptor)) - { - return Fail("%s: Could not convert paddings", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - // Determine type of padding value - OperandType operandType0; - OperandType operandType2; - - if (!GetOperandType(operation, 0, model, operandType0) || - !GetOperandType(operation, 2, model, operandType2)) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - // Read value to use for padding - if (operandType0 == OperandType::TENSOR_FLOAT16 && operandType2 == OperandType::FLOAT16) - { - Half f16PadValue; - if (!GetInputScalar(operation, 2, operandType2, f16PadValue, model, data)) - { - return Fail("%s: Could not read input 2 (FLOAT16)", __func__); - } - - descriptor.m_PadValue = f16PadValue; - } - else if (operandType0 == OperandType::TENSOR_FLOAT32 && operandType2 == OperandType::FLOAT32) - { - if (!GetInputFloat32(operation, 2, descriptor.m_PadValue, model, data)) - { - return Fail("%s: Could not read input 2 (FLOAT32)", __func__); - } - } - else if (operandType0 == OperandType::TENSOR_QUANT8_ASYMM && operandType2 == OperandType::INT32) - { - int32_t intPadValue = 0; - if (!GetInputInt32(operation, 2, intPadValue, model, data)) - { - return Fail("%s: Could not read input 2 (INT32)", __func__); - } - descriptor.m_PadValue = intPadValue; - } - else - { - return Fail("%s: Operation has invalid inputs: type mismatch", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsPadSupported, - data.m_Backends, - isSupported, - inputInfo, - outputInfo, - descriptor); - if (!isSupported) - { - return false; - } - - IConnectableLayer* const layer = data.m_Network->AddPadLayer(descriptor); - assert(layer != nullptr); - input.Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(outputInfo); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); -} - -bool HalPolicy::ConvertPrelu(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertPrelu()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - LayerInputHandle alpha = ConvertToLayerInputHandle(operation, 1, model, data); - - if (!input.IsValid() || !alpha.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - - if (!output) - { - return Fail("%s: Could not read output", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& alphaInfo = alpha.GetTensorInfo(); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsPreluSupported, - data.m_Backends, - isSupported, - inputInfo, - alphaInfo, - outputInfo); - if (!isSupported) - { - return false; - } - - IConnectableLayer* const layer = data.m_Network->AddPreluLayer(); - - if (!layer) - { - return Fail("%s: AddPreluLayer failed", __func__); - } - - bool isReshapeSupported = BroadcastTensor(input, alpha, layer, data); - if (!isReshapeSupported) - { - return false; - } - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); -} - -bool HalPolicy::ConvertQuantize(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertQuantize()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid input", __func__); - } - - const Operand* const outputOperand = GetOutputOperand(operation, 0, model); - if (!outputOperand) - { - return Fail("%s: Operation has invalid outputs", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsQuantizeSupported, - data.m_Backends, - isSupported, - input.GetTensorInfo(), - outputInfo); - if (!isSupported) - { - return false; - } - - IConnectableLayer* const layer = data.m_Network->AddQuantizeLayer(); - assert(layer != nullptr); - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); -} - -bool HalPolicy::ConvertQuantizedLstm(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertQuantizedLstm()"); - - //Inputs: - // 0: The input: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBatches, inputSize] - // specifying the input to the LSTM cell. Tensor is quantized with a fixed quantization range of -1, 127/128. - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Could not read input 0: input", __func__); - } - - //13: The previous cell state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT16_SYMM and shape - // [numBatches, outputSize] specifying the cell state from the previous time step of the LSTM cell. - // It is quantized using a quantization range of -2^4, 2^4 * 32767/32768. - LayerInputHandle previousCellStateIn = ConvertToLayerInputHandle(operation, 13, model, data); - if (!previousCellStateIn.IsValid()) - { - return Fail("%s: Could not read input 13: previousCellStateIn", __func__); - } - - // 14: The previous output state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [numBathes, outputSize] specifying the output of the LSTM cell from previous time-step. Tensor - // is quantized with a fixed quantization range of -1, 127/128. - LayerInputHandle previousOutputIn = ConvertToLayerInputHandle(operation, 14, model, data); - if (!previousOutputIn.IsValid()) - { - return Fail("%s: Could not read input 14: previousOutputIn", __func__); - } - - // Get the input tensors: - // 1: The input-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, inputSize] specifying input-to-input part of weights for fully-connected layer inside the - // LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin inputToInputWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 1, model, data); - - // 2: The input-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, inputSize] specifying input-to-forget part of weights for fully-connected layer inside the - // LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin inputToForgetWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 2, model, data); - - // 3: The input-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, inputSize] specifying input-to-cell part of weights for fully-connected layer inside the - // LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin inputToCellWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 3, model, data); - - // 4: The input-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, inputSize] specifying input-to-output part of weights for fully-connected layer inside the - // LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin inputToOutputWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 4, model, data); - - // 5: The recurrent-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, outputSize] specifying recurrent-to-input part of weights for fully-connected layer inside - // the LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin recurrentToInputWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 5, model, data); - - // 6: The recurrent-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, outputSize] specifying recurrent-to-forget part of weights for fully-connected layer inside - // the LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin recurrentToForgetWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 6, model, data); - - // 7: The recurrent-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, outputSize] specifying recurrent-to-cell part of weights for fully-connected layer inside - // the LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin recurrentToCellWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 7, model, data); - - // 8: The recurrent-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape - // [outputSize, outputSize] specifying recurrent-to-output part of weights for fully-connected layer inside - // the LSTM cell. Quantization zero point and scale must be the same across all the weights. - const ConstTensorPin recurrentToOutputWeightsPin = - ConvertOperationInputToConstTensorPin(operation, 8, model, data); - - // 9: The input gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the - // bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product - // of input and weights scales and zeroPoint equal to 0. - const ConstTensorPin inputGateBiasPin = - ConvertOperationInputToConstTensorPin(operation, 9, model, data); - - // 10: The forget gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying - // the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product - // of input and weights scales and zeroPoint equal to 0. - const ConstTensorPin forgetGateBiasPin = - ConvertOperationInputToConstTensorPin(operation, 10, model, data); - - // 11:The cell bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the bias - // for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product of input - // and weights scales and zeroPoint equal to 0. - const ConstTensorPin cellBiasPin = - ConvertOperationInputToConstTensorPin(operation, 11, model, data); - - // 12:The output gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying - // the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product - // of input and weights scales and zeroPoint equal to 0. - const ConstTensorPin outputGateBiasPin = - ConvertOperationInputToConstTensorPin(operation, 12, model, data); - - if (!inputToInputWeightsPin.IsValid() || - !inputToForgetWeightsPin.IsValid() || - !inputToCellWeightsPin.IsValid() || - !inputToOutputWeightsPin.IsValid() || - !recurrentToInputWeightsPin.IsValid() || - !recurrentToForgetWeightsPin.IsValid() || - !recurrentToCellWeightsPin.IsValid() || - !recurrentToOutputWeightsPin.IsValid() || - !inputGateBiasPin.IsValid() || - !forgetGateBiasPin.IsValid() || - !cellBiasPin.IsValid() || - !outputGateBiasPin.IsValid()) - { - return Fail("%s: Operation has invalid tensor inputs", __func__); - } - - // Outputs: - // 0: The cell state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT16_SYMM and shape [numBatches, outputSize] - // which contains a cell state from the current time step. Tensor is quantized using a quantization range - // of -2^4, 2^4 * 32767/32768. - const Operand* cellStateOut = GetOutputOperand(operation, 0, model); - if (!cellStateOut) - { - return Fail("%s: Could not read output 0: cellStateOut", __func__); - } - - // 1: The output: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBathes, outputSize] which - // contains the output value. Tensor is quantized with a fixed quantization range of -1, 127/128. - const Operand* output = GetOutputOperand(operation, 1, model); - if (!output) - { - return Fail("%s: Could not read output 1: output", __func__); - } - - // Inputs - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& previousCellStateInInfo = previousCellStateIn.GetTensorInfo(); - const TensorInfo& previousOutputInInfo = previousOutputIn.GetTensorInfo(); - - // Outputs - const TensorInfo& cellStateOutInfo = GetTensorInfoForOperand(*cellStateOut); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - // Dynamic tensors currently not supported - if (IsDynamicTensor(cellStateOutInfo) || IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - QuantizedLstmInputParams params; - - params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr(); - params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr(); - params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr(); - params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr(); - params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr(); - params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr(); - params.m_CellBias = cellBiasPin.GetConstTensorPtr(); - params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr(); - - QuantizedLstmInputParamsInfo paramsInfo; - paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo()); - paramsInfo.m_InputToForgetWeights = &(params.m_InputToForgetWeights->GetInfo()); - paramsInfo.m_InputToCellWeights = &(params.m_InputToCellWeights->GetInfo()); - paramsInfo.m_InputToOutputWeights = &(params.m_InputToOutputWeights->GetInfo()); - paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo()); - paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo()); - paramsInfo.m_RecurrentToCellWeights = &(params.m_RecurrentToCellWeights->GetInfo()); - paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo()); - paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo()); - paramsInfo.m_ForgetGateBias = &(params.m_ForgetGateBias->GetInfo()); - paramsInfo.m_CellBias = &(params.m_CellBias->GetInfo()); - paramsInfo.m_OutputGateBias = &(params.m_OutputGateBias->GetInfo()); +bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertPad()"); + return ::ConvertPad(operation, model, data); +} - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsQuantizedLstmSupported, - data.m_Backends, - isSupported, - inputInfo, - previousCellStateInInfo, - previousOutputInInfo, - cellStateOutInfo, - outputInfo, - paramsInfo); +bool HalPolicy::ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertPadV2()"); + return ::ConvertPadV2(operation, model, data); +} - if (!isSupported) - { - return false; - } +bool HalPolicy::ConvertPrelu(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertPrelu()"); + return ::ConvertPrelu(operation, model, data); +} - IConnectableLayer* const layer = data.m_Network->AddQuantizedLstmLayer(params, "QuantizedLstm"); - input.Connect(layer->GetInputSlot(0)); - previousCellStateIn.Connect(layer->GetInputSlot(1)); - previousOutputIn.Connect(layer->GetInputSlot(2)); +bool HalPolicy::ConvertQuantize(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertQuantize()"); + return ::ConvertQuantize(operation, model, data); +} - return (SetupAndTrackLayerOutputSlot(operation, 0, *layer, 0, model, data) && - SetupAndTrackLayerOutputSlot(operation, 1, *layer, 1, model, data)); +bool HalPolicy::ConvertQuantizedLstm(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertQuantizedLstm()"); + return ::ConvertQuantizedLstm(operation, model, data); } bool HalPolicy::ConvertReLu(const Operation& operation, const Model& model, ConversionData& data) @@ -1857,134 +374,7 @@ bool HalPolicy::ConvertResize(const Operation& operation, ResizeMethod resizeMethod) { ALOGV("hal_1_2::HalPolicy::ConvertResize()"); - ALOGV("resizeMethod = %s", GetResizeMethodAsCString(resizeMethod)); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Could not read input 0", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - ResizeDescriptor descriptor; - descriptor.m_Method = resizeMethod; - descriptor.m_DataLayout = OptionalDataLayout(operation, 3, model, data); - - OperandType operandType1; - OperandType operandType2; - - if (!GetOperandType(operation, 1, model, operandType1) || - !GetOperandType(operation, 2, model, operandType2)) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - if (operandType1 != operandType2) - { - return Fail("%s: Operation has invalid inputs. Type of input 1 and 2 should be the same", __func__); - } - - if (operandType1 == OperandType::INT32) - { - // Case 1: resizing by shape - int32_t targetWidth = 0; - int32_t targetHeight = 0; - - if (!GetInputInt32(operation, 1, targetWidth, model, data) || - !GetInputInt32(operation, 2, targetHeight, model, data)) - { - return Fail("%s: Operation has invalid inputs for resizing by shape", __func__); - } - - if (targetWidth < 0 || targetHeight < 0) - { - return Fail("%s: Operation has invalid inputs for resizing by shape. " - "Target width/height cannot be < 0", __func__); - } - - descriptor.m_TargetWidth = static_cast(targetWidth); - descriptor.m_TargetHeight = static_cast(targetHeight); - } - else if (operandType1 == OperandType::FLOAT32) - { - // Case 2: resizing by scale - float widthScale = 1.0f; - float heightScale = 1.0f; - - if (!GetInputFloat32(operation, 1, widthScale, model, data) || - !GetInputFloat32(operation, 2, heightScale, model, data)) - { - return Fail("%s: Operation has invalid inputs for resizing by scale", __func__); - } - - const TensorShape& inputShape = inputInfo.GetShape(); - armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout); - - float width = inputShape[dataLayoutIndexed.GetWidthIndex()]; - float height = inputShape[dataLayoutIndexed.GetHeightIndex()]; - - descriptor.m_TargetWidth = std::floor(width * widthScale); - descriptor.m_TargetHeight = std::floor(height * heightScale); - } - else if (operandType1 == OperandType::FLOAT16) - { - Half widthScale; - Half heightScale; - - if (!GetInputScalar(operation, 1, HalPolicy::OperandType::FLOAT16, widthScale, model, data) || - !GetInputScalar(operation, 2, HalPolicy::OperandType::FLOAT16, heightScale, model, data)) - { - return Fail("%s: Operation has invalid inputs for resizing by scale", __func__); - } - - const TensorShape& inputShape = inputInfo.GetShape(); - armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout); - - Half width = static_cast(inputShape[dataLayoutIndexed.GetWidthIndex()]); - Half height = static_cast(inputShape[dataLayoutIndexed.GetHeightIndex()]); - - descriptor.m_TargetWidth = std::floor(width * widthScale); - descriptor.m_TargetHeight = std::floor(height * heightScale); - } - else - { - return Fail("%s: Operand has invalid data type for resizing by scale", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsResizeSupported, - data.m_Backends, - isSupported, - inputInfo, - outputInfo, - descriptor); - - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddResizeLayer(descriptor); - - assert(layer != nullptr); - - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); + return ::ConvertResize(operation, model, data, resizeMethod); } bool HalPolicy::ConvertSpaceToBatchNd(const Operation& operation, const Model& model, ConversionData& data) @@ -1996,126 +386,13 @@ bool HalPolicy::ConvertSpaceToBatchNd(const Operation& operation, const Model& m bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertSpaceToDepth()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid() ) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - unsigned int rank = inputInfo.GetNumDimensions(); - if (rank != 4) - { - return Fail("%s: Only inputs with rank 4 are supported", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - SpaceToDepthDescriptor desc; - - GetInputScalar(operation, 1, OperandType::INT32, desc.m_BlockSize, model, data); - - if (desc.m_BlockSize <= 1) - { - return Fail("%s: Block size must be at least 1 in all dimensions"); - } - - desc.m_DataLayout = OptionalDataLayout(operation, 2, model, data); - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsSpaceToDepthSupported, - data.m_Backends, - isSupported, - inputInfo, - outputInfo, - desc); - if (!isSupported) - { - return false; - } - - IConnectableLayer* const layer = data.m_Network->AddSpaceToDepthLayer(desc); - assert(layer != nullptr); - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); + return ::ConvertSpaceToDepth(operation, model, data); } bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertSoftmax()"); - - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* outputOperand = GetOutputOperand(operation, 0, model); - if (!outputOperand) - { - return Fail("%s: Operation has no outputs", __func__); - } - - const TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - SoftmaxDescriptor desc; - if (!GetInputFloat32(operation, 1, desc.m_Beta, model, data)) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - if (operation.inputs.size() > 2 && !GetInputScalar(operation, - 2, - HalPolicy::OperandType::INT32, - desc.m_Axis, - model, - data)) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - if (input.GetTensorInfo().GetNumDimensions() > 2 || - !(desc.m_Axis == 1 || - (desc.m_Axis < 0 && static_cast(input.GetTensorInfo().GetNumDimensions()) + desc.m_Axis == 1))) - { - return Fail("%s: Unsupported input greater than 2D or axis != 1", __func__); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsSoftmaxSupported, - data.m_Backends, - isSupported, - input.GetTensorInfo(), - outputInfo, - desc); - if (!isSupported) - { - return false; - } - - IConnectableLayer* layer = data.m_Network->AddSoftmaxLayer(desc); - assert(layer != nullptr); - input.Connect(layer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); + return ::ConvertSoftmax(operation, model, data); } bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data) @@ -2130,450 +407,10 @@ bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, Conv return ::ConvertTanH(operation, model, data); } -template -bool SetupAndTrackLayerOutputSlotAndOverrideTensorInfo(const HalOperation& operation, - uint32_t operationOutputIndex, - armnn::IConnectableLayer& layer, - uint32_t layerOutputIndex, - const HalModel& model, - ConversionData& data, - const armnn::TensorInfo tensor_info) -{ - using HalOperand = typename HalPolicy::Operand; - - const HalOperand* outputOperand = GetOutputOperand(operation, operationOutputIndex, model); - if ((outputOperand == nullptr) || (operationOutputIndex >= layer.GetNumOutputSlots())) - { - return false; - } - - armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(layerOutputIndex); - - const uint32_t operandIndex = operation.outputs[operationOutputIndex]; - data.m_OutputSlotForOperand[operandIndex] = &outputSlot; - - outputSlot.SetTensorInfo(tensor_info); - - return true; -} - - bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertLstm()"); - - // Inputs: - // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where - // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - if (!input.IsValid()) - { - return Fail("%s: Could not read input 0: input", __func__); - } - // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. - LayerInputHandle outputStateIn = ConvertToLayerInputHandle(operation, 18, model, data); - if (!outputStateIn.IsValid()) - { - return Fail("%s: Could not read input 18: outputStateIn", __func__); - } - // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. - LayerInputHandle cellStateIn = ConvertToLayerInputHandle(operation, 19, model, data); - if (!cellStateIn.IsValid()) - { - return Fail("%s: Could not read input 19: cellStateIn", __func__); - } - - // Get the mandatory input tensors: - // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, input_size]. - const ConstTensorPin inputToForgetWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 2)); - // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, input_size]. - const ConstTensorPin inputToCellWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 3)); - // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, input_size]. - const ConstTensorPin inputToOutputWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 4)); - // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, output_size]. - const ConstTensorPin recurrentToForgetWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 6)); - // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, output_size]. - const ConstTensorPin recurrentToCellWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 7)); - // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, output_size]. - const ConstTensorPin recurrentToOutputWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 8)); - // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. - const ConstTensorPin forgetGateBiasPin = - ConvertOperationInputToConstTensorPin(operation, 13, model, data); - // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. - const ConstTensorPin cellBiasPin = - ConvertOperationInputToConstTensorPin(operation, 14, model, data); - // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. - const ConstTensorPin outputGateBiasPin = - ConvertOperationInputToConstTensorPin(operation, 15, model, data); - - if (!inputToForgetWeightsPin.IsValid() || - !inputToCellWeightsPin.IsValid() || - !inputToOutputWeightsPin.IsValid() || - !recurrentToForgetWeightsPin.IsValid() || - !recurrentToCellWeightsPin.IsValid() || - !recurrentToOutputWeightsPin.IsValid() || - !forgetGateBiasPin.IsValid() || - !cellBiasPin.IsValid() || - !outputGateBiasPin.IsValid()) - { - return Fail("%s: Operation has invalid tensor inputs", __func__); - } - - // Get the optional input tensors: - // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, input_size], where “num_units” corresponds to the number of cell units. - const ConstTensorPin inputToInputWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 1, true)); - // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., - // “num_units”), or the second dimension of the “projection_weights”, if defined. - const ConstTensorPin recurrentToInputWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 5, true)); - // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. - const ConstTensorPin cellToInputWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 9, true)); - // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. - const ConstTensorPin cellToForgetWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 10, true)); - // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. - const ConstTensorPin cellToOutputWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 11, true)); - // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. - const ConstTensorPin inputGateBiasPin = - ConvertOperationInputToConstTensorPin(operation, - 12, - model, - data, - g_DontPermute, - nullptr, - true); - - // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape - // [output_size, num_units]. - const ConstTensorPin projectionWeightsPin = - (DequantizeAndMakeConstTensorPin(operation, model, data, 16, true)); - // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. - const ConstTensorPin projectionBiasPin = - ConvertOperationInputToConstTensorPin(operation, - 17, - model, - data, - g_DontPermute, - nullptr, - true); - - if ((!inputToInputWeightsPin.IsValid() && !inputToInputWeightsPin.IsOptional()) || - (!recurrentToInputWeightsPin.IsValid() && !recurrentToInputWeightsPin.IsOptional()) || - (!cellToInputWeightsPin.IsValid() && !cellToInputWeightsPin.IsOptional()) || - (!cellToForgetWeightsPin.IsValid() && !cellToForgetWeightsPin.IsOptional()) || - (!cellToOutputWeightsPin.IsValid() && !cellToOutputWeightsPin.IsOptional()) || - (!inputGateBiasPin.IsValid() && !inputGateBiasPin.IsOptional()) || - (!projectionWeightsPin.IsValid() && !projectionWeightsPin.IsOptional()) || - (!projectionBiasPin.IsValid() && !projectionBiasPin.IsOptional())) - { - return Fail("%s: Operation has invalid tensor inputs", __func__); - } - - // Get the mandatory input scalars (actually 1-D tensors of size 1): - // 20: The activation function: A value indicating the activation function: - // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. - // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. - // If set to 0.0 then clipping is disabled. - // 22: The clipping threshold: for the output from the projection layer, such that values are bound within - // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. - ActivationFn activation; - float cellClip; - float projClip; - if (!GetInputActivationFunctionFromTensor(operation, 20, activation, model, data) || - !GetInputScalar(operation, 21, OperandType::FLOAT32, cellClip, model, data) || - !GetInputScalar(operation, 22, OperandType::FLOAT32, projClip, model, data)) - { - return Fail("%s: Operation has invalid scalar inputs", __func__); - } - - // Get the normalization tensors - // 23: The input layer normalization weights. A 1-D tensor of shape [num_units]. - // Used to rescale normalized inputs to activation at input gate. - const ConstTensorPin inputLayerNormWeightsPin - (DequantizeAndMakeConstTensorPin(operation, model, data, 23, true)); - - // 24: The forget layer normalization weights. A 1-D tensor of shape [num_units]. - // Used to rescale normalized inputs to activation at forget gate. - const ConstTensorPin forgetLayerNormWeightsPin = - ConvertOperationInputToConstTensorPin(operation, - 24, - model, - data, - g_DontPermute, - nullptr, - true); - - // 25: The cell layer normalization weights. A 1-D tensor of shape [num_units]. - // Used to rescale normalized inputs to activation at cell gate. - const ConstTensorPin cellLayerNormWeightsPin = - ConvertOperationInputToConstTensorPin(operation, - 25, - model, - data, - g_DontPermute, - nullptr, - true); - - // 26: The output layer normalization weights. A 1-D tensor of shape [num_units]. - // Used to rescale normalized inputs to activation at output gate. - const ConstTensorPin outputLayerNormWeightsPin = - ConvertOperationInputToConstTensorPin(operation, - 26, - model, - data, - g_DontPermute, - nullptr, - true); - - // Outputs: - // 00: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] - // with CIFG, or [batch_size, num_units * 3] without CIFG. - const Operand* scratchBuffer = GetOutputOperand(operation, 0, model); - if (!scratchBuffer) - { - return Fail("%s: Could not read output 0: scratchBuffer", __func__); - } - // 01: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. - const Operand* outputStateOut = GetOutputOperand(operation, 1, model); - if (!outputStateOut) - { - return Fail("%s: Could not read output 1: outputStateOut", __func__); - } - // 02: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. - const Operand* cellStateOut = GetOutputOperand(operation, 2, model); - if (!cellStateOut) - { - return Fail("%s: Could not read output 2: cellStateOut", __func__); - } - // 03: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is - // effectively the same as the current “output state (out)” value. - const Operand* output = GetOutputOperand(operation, 3, model); - if (!output) - { - return Fail("%s: Could not read output 3: output", __func__); - } - - // set the params structure for the AddLstmLayer call - LstmInputParams params; - params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr(); - params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr(); - params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr(); - params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr(); - params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr(); - params.m_CellToInputWeights = cellToInputWeightsPin.GetConstTensorPtr(); - params.m_CellToForgetWeights = cellToForgetWeightsPin.GetConstTensorPtr(); - params.m_CellToOutputWeights = cellToOutputWeightsPin.GetConstTensorPtr(); - params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr(); - params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr(); - params.m_CellBias = cellBiasPin.GetConstTensorPtr(); - params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr(); - params.m_ProjectionWeights = projectionWeightsPin.GetConstTensorPtr(); - params.m_ProjectionBias = projectionBiasPin.GetConstTensorPtr(); - params.m_InputLayerNormWeights = inputLayerNormWeightsPin.GetConstTensorPtr(); - params.m_ForgetLayerNormWeights = forgetLayerNormWeightsPin.GetConstTensorPtr(); - params.m_CellLayerNormWeights = cellLayerNormWeightsPin.GetConstTensorPtr(); - params.m_OutputLayerNormWeights = outputLayerNormWeightsPin.GetConstTensorPtr(); - - // set the layer descriptor - LstmDescriptor desc; - desc.m_ActivationFunc = activation; - desc.m_ClippingThresCell = cellClip; - desc.m_ClippingThresProj = projClip; - desc.m_CifgEnabled = (params.m_InputToInputWeights == nullptr || - params.m_RecurrentToInputWeights == nullptr || - params.m_InputGateBias == nullptr); - desc.m_PeepholeEnabled = (params.m_CellToForgetWeights != nullptr || - params.m_CellToOutputWeights != nullptr); - desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr); - desc.m_LayerNormEnabled = (params.m_InputLayerNormWeights != nullptr || - params.m_ForgetLayerNormWeights != nullptr || - params.m_CellLayerNormWeights != nullptr || - params.m_OutputLayerNormWeights != nullptr); - - // validate the optional input groups - if (desc.m_CifgEnabled && - (params.m_InputToInputWeights != nullptr || - params.m_RecurrentToInputWeights != nullptr || - params.m_InputGateBias != nullptr)) - { - return Fail("%s: All, or none, of input-to-input weights, recurrent-to-input weights," - " and input gate bias must be provided", __func__); - } - - if (!desc.m_ProjectionEnabled && params.m_ProjectionBias != nullptr) - { - return Fail("%s: projection bias should not be provided without projection weights", __func__); - } - - if (desc.m_PeepholeEnabled && - (params.m_CellToForgetWeights == nullptr || - params.m_CellToOutputWeights == nullptr || - (!desc.m_CifgEnabled && params.m_CellToInputWeights == nullptr))) - { - return Fail("%s: All, or none, of cell-to-forget weights and cell-to-output weights must be provided" - " and, if CIFG is not enabled, cell-to-input weights must also be provided", __func__); - } - - if (desc.m_LayerNormEnabled && - (params.m_ForgetLayerNormWeights == nullptr || - params.m_CellLayerNormWeights == nullptr || - params.m_OutputLayerNormWeights == nullptr || - (!desc.m_CifgEnabled && params.m_InputLayerNormWeights == nullptr))) - { - return Fail("%s: All, or none, of forget-norm weights, cell-norm weights and output-norm weights must be" - " provided and, if CIFG is not enabled, input-norm weights must also be provided", __func__); - } - - // Check if the layer is supported - // Inputs - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& outputStateInInfo = outputStateIn.GetTensorInfo(); - const TensorInfo& cellStateInInfo = cellStateIn.GetTensorInfo(); - - // Outputs - const TensorInfo& scratchBufferInfo = GetTensorInfoForOperand(*scratchBuffer); - const TensorInfo& outputStateOutInfo = GetTensorInfoForOperand(*outputStateOut); - const TensorInfo& cellStateOutInfo = GetTensorInfoForOperand(*cellStateOut); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - - // Check if the scratch buffer shape was initialized, - // In some cases the shape could be (0,0) which requires the driver - // to infer the shape and set it up accordingly. - // The code below does that. - TensorInfo fixSbInfo = scratchBufferInfo; - if (IsDynamicTensor(scratchBufferInfo)) - { - auto & s = fixSbInfo.GetShape(); - s[0] = outputStateInInfo.GetShape()[0]; - if (desc.m_CifgEnabled) - { - // 2D tensor with dimensions [num_units * 3, batch_size] with CIFG - s[1] = cellStateOutInfo.GetShape()[1]*3; - } - else - { - // scratch_buffer [num_units * 4, batch_size] without CIFG - s[1] = cellStateOutInfo.GetShape()[1]*4; - } - } - - if (IsDynamicTensor(outputStateOutInfo) || - IsDynamicTensor(cellStateOutInfo) || - IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported %d %d %d %d", __func__, - IsDynamicTensor(scratchBufferInfo), IsDynamicTensor(outputStateOutInfo), - IsDynamicTensor(cellStateOutInfo), IsDynamicTensor(outputInfo)); - } - - // Basic parameters - LstmInputParamsInfo paramsInfo; - paramsInfo.m_InputToForgetWeights = &(params.m_InputToForgetWeights->GetInfo()); - paramsInfo.m_InputToCellWeights = &(params.m_InputToCellWeights->GetInfo()); - paramsInfo.m_InputToOutputWeights = &(params.m_InputToOutputWeights->GetInfo()); - paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo()); - paramsInfo.m_RecurrentToCellWeights = &(params.m_RecurrentToCellWeights->GetInfo()); - paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo()); - paramsInfo.m_ForgetGateBias = &(params.m_ForgetGateBias->GetInfo()); - paramsInfo.m_CellBias = &(params.m_CellBias->GetInfo()); - paramsInfo.m_OutputGateBias = &(params.m_OutputGateBias->GetInfo()); - - // Optional parameters - if(!desc.m_CifgEnabled) - { - paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo()); - paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo()); - if (params.m_CellToInputWeights != nullptr) - { - paramsInfo.m_CellToInputWeights = &(params.m_CellToInputWeights->GetInfo()); - } - paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo()); - } - - if(desc.m_ProjectionEnabled) - { - paramsInfo.m_ProjectionWeights = &(params.m_ProjectionWeights->GetInfo()); - if (params.m_ProjectionBias != nullptr) - { - paramsInfo.m_ProjectionBias = &(params.m_ProjectionBias->GetInfo()); - } - } - - if(desc.m_PeepholeEnabled) - { - paramsInfo.m_CellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo()); - paramsInfo.m_CellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo()); - } - - if (desc.m_LayerNormEnabled) - { - if(!desc.m_CifgEnabled) - { - paramsInfo.m_InputLayerNormWeights = &(params.m_InputLayerNormWeights->GetInfo()); - } - paramsInfo.m_ForgetLayerNormWeights = &(params.m_ForgetLayerNormWeights->GetInfo()); - paramsInfo.m_CellLayerNormWeights = &(params.m_CellLayerNormWeights->GetInfo()); - paramsInfo.m_OutputLayerNormWeights = &(params.m_OutputLayerNormWeights->GetInfo()); - } - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsLstmSupported, - data.m_Backends, - isSupported, - inputInfo, - outputStateInInfo, - cellStateInInfo, - fixSbInfo, - outputStateOutInfo, - cellStateOutInfo, - outputInfo, - desc, - paramsInfo); - if (!isSupported) - { - return false; - } - - // Add the layer - IConnectableLayer* layer = data.m_Network->AddLstmLayer(desc, params, "Lstm"); - - input.Connect(layer->GetInputSlot(0)); - outputStateIn.Connect(layer->GetInputSlot(1)); - cellStateIn.Connect(layer->GetInputSlot(2)); - - - return ( - (IsDynamicTensor(scratchBufferInfo)? - SetupAndTrackLayerOutputSlotAndOverrideTensorInfo( - operation, 0, *layer, 0, model, data,fixSbInfo): - SetupAndTrackLayerOutputSlot( - operation, 0, *layer, 0, model, data)) && - SetupAndTrackLayerOutputSlot(operation, 1, *layer, 1, model, data) && - SetupAndTrackLayerOutputSlot(operation, 2, *layer, 2, model, data) && - SetupAndTrackLayerOutputSlot(operation, 3, *layer, 3, model, data)); + return ::ConvertLstm(operation, model, data); } bool HalPolicy::ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data) @@ -2605,175 +442,8 @@ bool HalPolicy::ConvertTranspose(const Operation& operation, const Model& model, bool HalPolicy::ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data) { - LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); - - if (!input.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); - } - - const Operand* output = GetOutputOperand(operation, 0, model); - - if (!output) - { - return Fail("%s: Could not read output 0", __func__); - } - - const TensorInfo& inputInfo = input.GetTensorInfo(); - const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - if (IsDynamicTensor(outputInfo)) - { - return Fail("%s: Dynamic output tensors are not supported", __func__); - } - - // ArmNN does not currently support non-fixed weights or bias - // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ] - const Operand* weightsOperand = GetInputOperand(operation, 1, model); - - if (weightsOperand == nullptr) - { - return Fail("%s: Operand is invalid", __func__); - } - TransposeConvolution2dDescriptor desc; - desc.m_DataLayout = DataLayout::NHWC; - - // Determine whether padding is implicit or explicit - bool implicitPadding = operation.inputs.size() == 9; - - if (implicitPadding ) - { - desc.m_DataLayout = OptionalDataLayout(operation, 8, model, data); - } - else - { - desc.m_DataLayout = OptionalDataLayout(operation, 10, model, data); - } - - armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout); - unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); - - const PermutationVector OHWIToOIHW = {0, 2, 3, 1}; - - // The shape of the weight is [depth_out, filter_height, filter_width, depth_in]. - // We have to permute it to OIHW if the data layout is NCHW. - const ConstTensorPin weightsPin = (desc.m_DataLayout == DataLayout::NCHW) ? - ConvertOperationInputToConstTensorPin(operation, 1, model, data, OHWIToOIHW) : - ConvertOperationInputToConstTensorPin(operation, 1, model, data); - - // Bias is a 1D tensor - const ConstTensorPin biasPin = - ConvertOperationInputToConstTensorPin(operation, 2, model, data); - - if (!weightsPin.IsValid()) - { - return Fail("%s: Operation has invalid weights", __func__); - } - - if (!biasPin.IsValid()) - { - return Fail("%s: Operation has invalid biases", __func__); - } - - ConstTensor weights = weightsPin.GetConstTensor(); - ConstTensor bias = biasPin.GetConstTensor(); - SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo); - - ActivationFn activation; - - if (implicitPadding) - { - int32_t strideX{0}; - int32_t strideY{0}; - int32_t padLeft{0}; - int32_t padRight{0}; - int32_t padTop{0}; - int32_t padBottom{0}; - - android::nn::PaddingScheme paddingScheme; - if (!GetInputPaddingScheme(operation, 4, paddingScheme, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, strideX, model, data) || - !GetInputScalar(operation, 6, OperandType::INT32, strideY, model, data) || - !GetInputActivationFunction(operation, 7, activation, model, data)) - { - return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); - } - - const uint32_t kernelX = weights.GetShape()[widthIndex]; - const uint32_t kernelY = weights.GetShape()[heightIndex]; - const uint32_t outputX = outputInfo.GetShape()[widthIndex]; - const uint32_t outputY = outputInfo.GetShape()[heightIndex]; - - CalcPaddingTransposeConv(outputX, kernelX, strideX, padLeft, padRight, paddingScheme); - CalcPaddingTransposeConv(outputY, kernelY, strideY, padTop, padBottom, paddingScheme); - - // NOTE: The Android NN API allows for negative padding values in TransposeConv2d, - // but Arm NN only supports values >= 0 - if (padLeft < 0 || padRight < 0 || padTop < 0 || padBottom < 0) - { - return Fail("%s: Negative padding values are not supported", __func__); - } - - desc.m_StrideX = boost::numeric_cast(strideX); - desc.m_StrideY = boost::numeric_cast(strideY); - desc.m_PadLeft = boost::numeric_cast(padLeft); - desc.m_PadRight = boost::numeric_cast(padRight); - desc.m_PadTop = boost::numeric_cast(padTop); - desc.m_PadBottom = boost::numeric_cast(padBottom); - } - else if (operation.inputs.size() == 11) - { - // explicit padding - if (!GetInputScalar(operation, 3, OperandType::INT32, desc.m_PadLeft, model, data) || - !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PadRight, model, data) || - !GetInputScalar(operation, 5, OperandType::INT32, desc.m_PadTop, model, data) || - !GetInputScalar(operation, 6, OperandType::INT32, desc.m_PadBottom, model, data) || - !GetInputScalar(operation, 7, OperandType::INT32, desc.m_StrideX, model, data) || - !GetInputScalar(operation, 8, OperandType::INT32, desc.m_StrideY, model, data) || - !GetInputActivationFunction(operation, 9, activation, model, data)) - { - return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); - } - } - else - { - return Fail("%s: Unsupported number of operation inputs", __func__); - } - - desc.m_BiasEnabled = true; - Optional biases(bias.GetInfo()); - - bool isSupported = false; - FORWARD_LAYER_SUPPORT_FUNC(__func__, - IsTransposeConvolution2dSupported, - data.m_Backends, - isSupported, - inputInfo, - outputInfo, - desc, - weights.GetInfo(), - biases); - if (!isSupported) - { - return false; - } - - IConnectableLayer* startLayer = - data.m_Network->AddTransposeConvolution2dLayer(desc, weights, Optional(bias)); - if (!startLayer) - { - return Fail("%s: AddTransposeConvolution2dLayer failed", __func__); - } - - IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, startLayer, data); - if (!endLayer) - { - return Fail("%s: ProcessActivation failed", __func__); - } - - input.Connect(startLayer->GetInputSlot(0)); - - return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); + ALOGV("hal_1_2::HalPolicy::ConvertTransposeConv2d()"); + return ::ConvertTransposeConv2d(operation, model, data); } } // namespace hal_1_2 diff --git a/1.2/HalPolicy.hpp b/1.2/HalPolicy.hpp index cd4f2da4..b127a638 100644 --- a/1.2/HalPolicy.hpp +++ b/1.2/HalPolicy.hpp @@ -6,6 +6,7 @@ #pragma once #include "../ConversionUtils.hpp" +#include "../ConversionUtils_1_2.hpp" #include @@ -29,6 +30,7 @@ public: using OperationType = V1_2::OperationType; using ExecutionCallback = V1_2::IExecutionCallback; using getSupportedOperations_cb = V1_2::IDevice::getSupportedOperations_1_2_cb; + using ErrorStatus = V1_0::ErrorStatus; static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data); diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp new file mode 100644 index 00000000..be355932 --- /dev/null +++ b/1.3/ArmnnDriver.hpp @@ -0,0 +1,294 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "../ArmnnDevice.hpp" +#include "ArmnnDriverImpl.hpp" +#include "HalPolicy.hpp" + +#include "../ArmnnDriverImpl.hpp" +#include "../1.3/ArmnnDriverImpl.hpp" +#include "../1.3/HalPolicy.hpp" +#include "../1.2/ArmnnDriverImpl.hpp" +#include "../1.2/HalPolicy.hpp" +#include "../1.1/ArmnnDriverImpl.hpp" +#include "../1.1/HalPolicy.hpp" +#include "../1.0/ArmnnDriverImpl.hpp" +#include "../1.0/HalPolicy.hpp" + +#include + +namespace armnn_driver +{ +namespace hal_1_3 +{ + +class ArmnnDriver : public ArmnnDevice, public V1_3::IDevice +{ +public: + + ArmnnDriver(DriverOptions options) + : ArmnnDevice(std::move(options)) + { + ALOGV("hal_1_3::ArmnnDriver::ArmnnDriver()"); + } + ~ArmnnDriver() {} + + using HidlToken = android::hardware::hidl_array; + +public: + Return getCapabilities(V1_0::IDevice::getCapabilities_cb cb) override + { + ALOGV("hal_1_3::ArmnnDriver::getCapabilities()"); + + return hal_1_0::ArmnnDriverImpl::getCapabilities(m_Runtime, cb); + } + + Return getSupportedOperations(const V1_0::Model& model, + V1_0::IDevice::getSupportedOperations_cb cb) override + { + ALOGV("hal_1_3::ArmnnDriver::getSupportedOperations()"); + + return armnn_driver::ArmnnDriverImpl::getSupportedOperations(m_Runtime, + m_Options, + model, + cb); + } + + Return prepareModel(const V1_0::Model& model, + const android::sp& cb) override + { + ALOGV("hal_1_3::ArmnnDriver::prepareModel()"); + + return armnn_driver::ArmnnDriverImpl::prepareModel(m_Runtime, + m_ClTunedParameters, + m_Options, + model, + cb); + } + + Return getCapabilities_1_1(V1_1::IDevice::getCapabilities_1_1_cb cb) override + { + ALOGV("hal_1_3::ArmnnDriver::getCapabilities_1_1()"); + + return hal_1_1::ArmnnDriverImpl::getCapabilities_1_1(m_Runtime, cb); + } + + Return getSupportedOperations_1_1(const V1_1::Model& model, + V1_1::IDevice::getSupportedOperations_1_1_cb cb) override + { + ALOGV("hal_1_3::ArmnnDriver::getSupportedOperations_1_1()"); + return armnn_driver::ArmnnDriverImpl::getSupportedOperations(m_Runtime, + m_Options, + model, + cb); + } + + Return prepareModel_1_1(const V1_1::Model& model, + V1_1::ExecutionPreference preference, + const android::sp& cb) override + { + ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_1()"); + + if (!(preference == ExecutionPreference::LOW_POWER || + preference == ExecutionPreference::FAST_SINGLE_ANSWER || + preference == ExecutionPreference::SUSTAINED_SPEED)) + { + ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_1: Invalid execution preference"); + cb->notify(V1_0::ErrorStatus::INVALID_ARGUMENT, nullptr); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + + return armnn_driver::ArmnnDriverImpl::prepareModel(m_Runtime, + m_ClTunedParameters, + m_Options, + model, + cb, + model.relaxComputationFloat32toFloat16 + && m_Options.GetFp16Enabled()); + } + + Return getCapabilities_1_2(getCapabilities_1_2_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getCapabilities()"); + + return hal_1_2::ArmnnDriverImpl::getCapabilities_1_2(m_Runtime, cb); + } + + Return getSupportedOperations_1_2(const V1_2::Model& model, + getSupportedOperations_1_2_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getSupportedOperations()"); + + return armnn_driver::ArmnnDriverImpl::getSupportedOperations(m_Runtime, + m_Options, + model, + cb); + } + + Return prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference, + const android::hardware::hidl_vec&, + const android::hardware::hidl_vec&, const HidlToken&, + const android::sp& cb) + { + ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2()"); + + if (!(preference == ExecutionPreference::LOW_POWER || + preference == ExecutionPreference::FAST_SINGLE_ANSWER || + preference == ExecutionPreference::SUSTAINED_SPEED)) + { + ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2: Invalid execution preference"); + cb->notify(V1_0::ErrorStatus::INVALID_ARGUMENT, nullptr); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + + return hal_1_2::ArmnnDriverImpl::prepareArmnnModel_1_2(m_Runtime, + m_ClTunedParameters, + m_Options, + model, + cb, + model.relaxComputationFloat32toFloat16 + && m_Options.GetFp16Enabled()); + } + + Return getCapabilities_1_3(getCapabilities_1_3_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getCapabilities()"); + + return hal_1_3::ArmnnDriverImpl::getCapabilities_1_3(m_Runtime, cb); + } + + Return getSupportedOperations_1_3(const V1_3::Model& model, + getSupportedOperations_1_3_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getSupportedOperations()"); + + return armnn_driver::ArmnnDriverImpl::getSupportedOperations(m_Runtime, + m_Options, + model, + cb); + } + + Return prepareModel_1_3(const V1_3::Model& model, + V1_1::ExecutionPreference preference, + V1_3::Priority priority, + const V1_3::OptionalTimePoint&, + const android::hardware::hidl_vec&, + const android::hardware::hidl_vec&, + const HidlToken&, + const android::sp& cb) + { + ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3()"); + + if (!(preference == ExecutionPreference::LOW_POWER || + preference == ExecutionPreference::FAST_SINGLE_ANSWER || + preference == ExecutionPreference::SUSTAINED_SPEED)) + { + ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3: Invalid execution preference"); + cb->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr); + return V1_3::ErrorStatus::INVALID_ARGUMENT; + } + + if (!android::nn::validatePriority(priority)) { + cb->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr); + return V1_3::ErrorStatus::INVALID_ARGUMENT; + } + + + return ArmnnDriverImpl::prepareArmnnModel_1_3(m_Runtime, + m_ClTunedParameters, + m_Options, + model, + cb, + model.relaxComputationFloat32toFloat16 + && m_Options.GetFp16Enabled()); + } + + Return getSupportedExtensions(getSupportedExtensions_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()"); + cb(V1_0::ErrorStatus::NONE, {/* No extensions. */}); + return Void(); + } + + Return getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()"); + + // Set both numbers to be 0 for cache not supported. + cb(V1_0::ErrorStatus::NONE, 0, 0); + return Void(); + } + + Return getStatus() override + { + ALOGV("hal_1_3::ArmnnDriver::getStatus()"); + + return armnn_driver::ArmnnDriverImpl::getStatus(); + } + + Return getVersionString(getVersionString_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getVersionString()"); + + cb(V1_0::ErrorStatus::NONE, "ArmNN"); + return Void(); + } + + Return getType(getType_cb cb) + { + ALOGV("hal_1_3::ArmnnDriver::getType()"); + + cb(V1_0::ErrorStatus::NONE, V1_2::DeviceType::CPU); + return Void(); + } + + Return prepareModelFromCache( + const android::hardware::hidl_vec&, + const android::hardware::hidl_vec&, + const HidlToken&, + const sp& callback) + { + ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()"); + callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + Return prepareModelFromCache_1_3( + V1_3::Priority, + const V1_3::OptionalTimePoint&, + const android::hardware::hidl_vec&, + const android::hardware::hidl_vec&, + const HidlToken&, + const sp& callback) + { + ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()"); + callback->notify_1_3(ErrorStatus::GENERAL_FAILURE, nullptr); + return ErrorStatus::GENERAL_FAILURE; + } + + Return supportsDeadlines(supportsDeadlines_cb cb) { + // Set both numbers to be false for deadlines not supported. + cb(/*prepareModelDeadline=*/false, /*executionDeadline=*/false); + return Void(); + } + + Return allocate(const V1_3::BufferDesc& /*desc*/, + const hidl_vec>& /*preparedModels*/, + const hidl_vec& /*inputRoles*/, + const hidl_vec& /*outputRoles*/, + allocate_cb cb) { + ALOGV("hal_1_3::ArmnnDriver::allocate()"); + cb(ErrorStatus::GENERAL_FAILURE, nullptr, 0); + return Void(); + } + +}; + +} // namespace hal_1_3 +} // namespace armnn_driver diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp new file mode 100644 index 00000000..98d038c9 --- /dev/null +++ b/1.3/ArmnnDriverImpl.cpp @@ -0,0 +1,338 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArmnnDriverImpl.hpp" +#include "../ArmnnPreparedModel_1_3.hpp" +#include "../ModelToINetworkConverter.hpp" +#include "../SystemPropertiesUtils.hpp" + +#include + +namespace +{ + +const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime"; +const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage"; + +const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime"; +const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage"; + +const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime"; +const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage"; + +const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime"; +const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage"; + +const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime"; +const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage"; + +const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime = + "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime"; +const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage = + "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage"; + +const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime = + "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime"; +const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage = + "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage"; + +const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime = + "Armnn.operandTypeTensorQuant16SymmPerformance.execTime"; +const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage = + "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage"; + +const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime = + "Armnn.operandTypeTensorQuant8SymmPerformance.execTime"; +const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage = + "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage"; + +const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime = + "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime"; +const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage = + "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage"; + + +const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime"; +const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage"; + +const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime"; +const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage"; + + +void NotifyCallbackAndCheck(const sp& callback, + V1_3::ErrorStatus errorStatus, + const sp& preparedModelPtr) +{ + Return returned = callback->notify_1_3(errorStatus, preparedModelPtr); + // This check is required, if the callback fails and it isn't checked it will bring down the service + if (!returned.isOk()) + { + ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ", + returned.description().c_str()); + } +} + +Return FailPrepareModel(V1_3::ErrorStatus error, + const std::string& message, + const sp& callback) +{ + ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str()); + NotifyCallbackAndCheck(callback, error, nullptr); + return error; +} + +} // anonymous namespace + +namespace armnn_driver +{ +namespace hal_1_3 +{ + +Return ArmnnDriverImpl::prepareArmnnModel_1_3( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const V1_3::Model& model, + const sp& cb, + bool float32ToFloat16) +{ + ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()"); + + if (cb.get() == nullptr) + { + ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel"); + return V1_3::ErrorStatus::INVALID_ARGUMENT; + } + + if (!runtime) + { + return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb); + } + + if (!android::nn::validateModel(model)) + { + return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb); + } + + // Deliberately ignore any unsupported operations requested by the options - + // at this point we're being asked to prepare a model that we've already declared support for + // and the operation indices may be different to those in getSupportedOperations anyway. + std::set unsupportedOperations; + ModelToINetworkConverter modelConverter(options.GetBackends(), + model, + unsupportedOperations); + + if (modelConverter.GetConversionResult() != ConversionResult::Success) + { + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb); + return V1_3::ErrorStatus::NONE; + } + + // Optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptions OptOptions; + OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + + std::vector errMessages; + try + { + optNet = armnn::Optimize(*modelConverter.GetINetwork(), + options.GetBackends(), + runtime->GetDeviceSpec(), + OptOptions, + errMessages); + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from optimize."; + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + // Check that the optimized network is valid. + if (!optNet) + { + std::stringstream message; + message << "Invalid optimized network"; + for (const std::string& msg : errMessages) + { + message << "\n" << msg; + } + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir()); + + // Load it into the runtime. + armnn::NetworkId netId = 0; + try + { + if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success) + { + return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb); + } + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what()<< ") caught from LoadNetwork."; + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + // Now that we have a networkId for the graph rename the dump file to use it + // so that we can associate the graph file and the input/output tensor dump files + RenameGraphDotFile(dotGraphFileName, + options.GetRequestInputsAndOutputsDumpDir(), + netId); + + std::unique_ptr> preparedModel( + new ArmnnPreparedModel_1_3( + netId, + runtime.get(), + model, + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled())); + + // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if + // this is enabled) before the first 'real' inference which removes the overhead of the first inference. + if (!preparedModel->ExecuteWithDummyInputs()) + { + return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + } + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + { + // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (std::exception& error) + { + ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", + options.GetClTunedParametersFile().c_str(), error.what()); + } + } + + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + + return V1_3::ErrorStatus::NONE; +} + +Return ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime, + V1_3::IDevice::getCapabilities_1_3_cb cb) +{ + ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()"); + + V1_3::Capabilities capabilities; + + float defaultValue = .1f; + + if (runtime) + { + capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = + ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue); + + capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = + ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue); + + // Set the base value for all operand types + capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX}); + + // Load supported operand types + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32, + { + .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32, + { + .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16, + { + .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16, + { + .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM, + { + .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM, + { + .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue) + }); + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED, + { + .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime, + defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage, + defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM, + { + .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL, + { + .execTime = + ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue), + .powerUsage = + ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32, + { + .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue) + }); + + update(&capabilities.operandPerformance, V1_3::OperandType::INT32, + { + .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue), + .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue) + }); + + cb(V1_3::ErrorStatus::NONE, capabilities); + } + else + { + capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0; + capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0; + + // Set the base value for all operand types + capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f}); + + cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities); + } + + return Void(); +} + +} // namespace hal_1_3 +} // namespace armnn_driver \ No newline at end of file diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp new file mode 100644 index 00000000..8a665ea5 --- /dev/null +++ b/1.3/ArmnnDriverImpl.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "../DriverOptions.hpp" + +#include + +using namespace android::nn::hal; + +namespace V1_0 = ::android::hardware::neuralnetworks::V1_0; +namespace V1_2 = ::android::hardware::neuralnetworks::V1_2; +namespace V1_3 = ::android::hardware::neuralnetworks::V1_3; + +namespace armnn_driver +{ +namespace hal_1_3 +{ + +class ArmnnDriverImpl +{ +public: + static Return prepareArmnnModel_1_3(const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const V1_3::Model& model, + const android::sp& cb, + bool float32ToFloat16 = false); + + static Return getCapabilities_1_3(const armnn::IRuntimePtr& runtime, + V1_3::IDevice::getCapabilities_1_3_cb cb); +}; + +} // namespace hal_1_3 +} // namespace armnn_driver \ No newline at end of file diff --git a/1.3/HalPolicy.cpp b/1.3/HalPolicy.cpp new file mode 100644 index 00000000..0de7573a --- /dev/null +++ b/1.3/HalPolicy.cpp @@ -0,0 +1,451 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "HalPolicy.hpp" + +namespace armnn_driver +{ +namespace hal_1_3 +{ + +using namespace armnn; + +namespace +{ + +} // anonymouse namespace + +bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, ConversionData& data) +{ + switch (operation.type) + { + case V1_3::OperationType::ABS: + return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Abs); + case V1_3::OperationType::ADD: + return ConvertAdd(operation, model, data); + case V1_3::OperationType::ARGMAX: + return ConvertArgMinMax(operation, model, data, ArgMinMaxFunction::Max); + case V1_3::OperationType::ARGMIN: + return ConvertArgMinMax(operation, model, data, ArgMinMaxFunction::Min); + case V1_3::OperationType::AVERAGE_POOL_2D: + return ConvertAveragePool2d(operation, model, data); + case V1_3::OperationType::BATCH_TO_SPACE_ND: + return ConvertBatchToSpaceNd(operation, model, data); + case V1_3::OperationType::CONCATENATION: + return ConvertConcatenation(operation, model, data); + case V1_3::OperationType::CONV_2D: + return ConvertConv2d(operation, model, data); + case V1_3::OperationType::DEPTH_TO_SPACE: + return ConvertDepthToSpace(operation, model, data); + case V1_3::OperationType::DEPTHWISE_CONV_2D: + return ConvertDepthwiseConv2d(operation, model, data); + case V1_3::OperationType::DEQUANTIZE: + return ConvertDequantize(operation, model, data); + case V1_3::OperationType::DIV: + return ConvertDiv(operation, model, data); + case V1_3::OperationType::EQUAL: + return ConvertComparison(operation, model, data, ComparisonOperation::Equal); + case V1_3::OperationType::EXPAND_DIMS: + return ConvertExpandDims(operation, model, data); + case V1_3::OperationType::FLOOR: + return ConvertFloor(operation, model, data); + case V1_3::OperationType::FULLY_CONNECTED: + return ConvertFullyConnected(operation, model, data); + case V1_3::OperationType::GREATER: + return ConvertComparison(operation, model, data, ComparisonOperation::Greater); + case V1_3::OperationType::GREATER_EQUAL: + return ConvertComparison(operation, model, data, ComparisonOperation::GreaterOrEqual); + case V1_3::OperationType::GROUPED_CONV_2D: + return ConvertGroupedConv2d(operation, model, data); + case V1_3::OperationType::INSTANCE_NORMALIZATION: + return ConvertInstanceNormalization(operation, model, data); + case V1_3::OperationType::L2_NORMALIZATION: + return ConvertL2Normalization(operation, model, data); + case V1_3::OperationType::L2_POOL_2D: + return ConvertL2Pool2d(operation, model, data); + case V1_3::OperationType::LESS: + return ConvertComparison(operation, model, data, ComparisonOperation::Less); + case V1_3::OperationType::LESS_EQUAL: + return ConvertComparison(operation, model, data, ComparisonOperation::LessOrEqual); + case V1_3::OperationType::LOCAL_RESPONSE_NORMALIZATION: + return ConvertLocalResponseNormalization(operation, model, data); + case V1_3::OperationType::LOGISTIC: + return ConvertLogistic(operation, model, data); + case V1_3::OperationType::LOG_SOFTMAX: + return ConvertLogSoftmax(operation, model, data); + case V1_3::OperationType::LSTM: + return ConvertLstm(operation, model, data); + case V1_3::OperationType::MAX_POOL_2D: + return ConvertMaxPool2d(operation, model, data); + case V1_3::OperationType::MAXIMUM: + return ConvertMaximum(operation, model, data); + case V1_3::OperationType::MEAN: + return ConvertMean(operation, model, data); + case V1_3::OperationType::MINIMUM: + return ConvertMinimum(operation, model, data); + case V1_3::OperationType::MUL: + return ConvertMul(operation, model, data); + case V1_3::OperationType::NEG: + return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Neg); + case V1_3::OperationType::NOT_EQUAL: + return ConvertComparison(operation, model, data, ComparisonOperation::NotEqual); + case V1_3::OperationType::PAD: + return ConvertPad(operation, model, data); + case V1_3::OperationType::PAD_V2: + return ConvertPadV2(operation, model, data); + case V1_3::OperationType::PRELU: + return ConvertPrelu(operation, model, data); + case V1_3::OperationType::QUANTIZE: + return ConvertQuantize(operation, model, data); + case V1_3::OperationType::QUANTIZED_16BIT_LSTM: + return ConvertQuantizedLstm(operation, model, data); + case V1_3::OperationType::RELU: + return ConvertReLu(operation, model, data); + case V1_3::OperationType::RELU1: + return ConvertReLu1(operation, model, data); + case V1_3::OperationType::RELU6: + return ConvertReLu6(operation, model, data); + case V1_3::OperationType::RESHAPE: + return ConvertReshape(operation, model, data); + case V1_3::OperationType::RESIZE_BILINEAR: + return ConvertResize(operation, model, data, ResizeMethod::Bilinear); + case V1_3::OperationType::RESIZE_NEAREST_NEIGHBOR: + return ConvertResize(operation, model, data, ResizeMethod::NearestNeighbor); + case V1_3::OperationType::RSQRT: + return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Rsqrt); + case V1_3::OperationType::SQRT: + return ConvertSqrt(operation, model, data); + case V1_3::OperationType::SQUEEZE: + return ConvertSqueeze(operation, model, data); + case V1_3::OperationType::STRIDED_SLICE: + return ConvertStridedSlice(operation, model, data); + case V1_3::OperationType::TRANSPOSE: + return ConvertTranspose(operation, model, data); + case V1_3::OperationType::TRANSPOSE_CONV_2D: + return ConvertTransposeConv2d(operation, model, data); + case V1_3::OperationType::SOFTMAX: + return ConvertSoftmax(operation, model, data); + case V1_3::OperationType::SPACE_TO_BATCH_ND : + return ConvertSpaceToBatchNd(operation, model, data); + case V1_3::OperationType::SPACE_TO_DEPTH: + return ConvertSpaceToDepth(operation, model, data); + case V1_3::OperationType::SUB: + return ConvertSub(operation, model, data); + case V1_3::OperationType::TANH: + return ConvertTanH(operation, model, data); + default: + return Fail("%s: Operation type %s not supported in ArmnnDriver", + __func__, toString(operation.type).c_str()); + } +} + +bool HalPolicy::ConvertAdd(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertAdd()"); + return ::ConvertAdd(operation, model, data); +} + +bool HalPolicy::ConvertArgMinMax(const V1_3::Operation& operation, + const V1_3::Model& model, + ConversionData& data, + armnn::ArgMinMaxFunction argMinMaxFunction) +{ + ALOGV("hal_1_3::HalPolicy::ConvertArgMinMax()"); + return ::ConvertArgMinMax(operation, model, data, argMinMaxFunction); +} + +bool HalPolicy::ConvertAveragePool2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertAveragePool2d()"); + return ConvertPooling2d(operation, __func__, PoolingAlgorithm::Average, model, data); +} + +bool HalPolicy::ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertBatchToSpaceNd()"); + return ::ConvertBatchToSpaceNd(operation, model, data); +} + +bool HalPolicy::ConvertComparison(const Operation& operation, + const Model& model, + ConversionData& data, + ComparisonOperation comparisonOperation) +{ + ALOGV("hal_1_3::HalPolicy::ConvertComparison()"); + return ::ConvertComparison_1_2(operation, model, data, comparisonOperation); +} + + +bool HalPolicy::ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertConcatenation()"); + return ::ConvertConcatenation(operation, model, data); +} + +bool HalPolicy::ConvertConv2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertConv2d()"); + return ::ConvertConv2d_1_2(operation, model, data); +} + +bool HalPolicy::ConvertDepthToSpace(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertDepthToSpace()"); + return ::ConvertDepthToSpace(operation, model, data); +} + +bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertDepthwiseConv2d()"); + return ::ConvertDepthwiseConv2d_1_2(operation, model, data); +} + +bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertDequantize()"); + return ::ConvertDequantize_1_2(operation, model, data); +} + +bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertDiv()"); + return ::ConvertDiv(operation, model, data); +} + +bool HalPolicy::ConvertElementwiseUnary(const Operation& operation, + const Model& model, + ConversionData& data, + UnaryOperation unaryOperation) +{ + ALOGV("hal_1_3::HalPolicy::ConvertElementwiseUnary()"); + return ::ConvertElementwiseUnary(operation, model, data, unaryOperation); +} + +bool HalPolicy::ConvertExpandDims(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertExpandDims()"); + return ::ConvertExpandDims(operation, model, data); +} + +bool HalPolicy::ConvertFloor(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertFloor()"); + return ::ConvertFloor(operation, model, data); +} + +bool HalPolicy::ConvertFullyConnected(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertFullyConnected()"); + return ::ConvertFullyConnected(operation, model, data); +} + +bool HalPolicy::ConvertGroupedConv2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertGroupedConv2d()"); + return ::ConvertGroupedConv2d(operation, model, data); +} + +bool HalPolicy::ConvertInstanceNormalization(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertInstanceNormalization()"); + return ::ConvertInstanceNormalization(operation, model, data); +} + +bool HalPolicy::ConvertL2Normalization(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertL2Normalization()"); + return ::ConvertL2Normalization(operation, model, data); +} + +bool HalPolicy::ConvertL2Pool2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertL2Pool2d()"); + return ConvertPooling2d(operation, __func__, PoolingAlgorithm::L2, model, data); +} + +bool HalPolicy::ConvertLocalResponseNormalization(const Operation& operation, + const Model& model, + ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertLocalResponseNormalization()"); + return ::ConvertLocalResponseNormalization(operation, model, data); +} + +bool HalPolicy::ConvertLogistic(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertLogistic()"); + return ::ConvertLogistic(operation, model, data); +} + +bool HalPolicy::ConvertLogSoftmax(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertLogSoftmax()"); + return ::ConvertLogSoftmax(operation, model, data); +} + +bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertLstm()"); + return ::ConvertLstm(operation, model, data); +} + +bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertMaxPool2d()"); + return ConvertPooling2d(operation, __func__, PoolingAlgorithm::Max, model, data); +} + +bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertConvertMaximumMaximum()"); + return ::ConvertMaximum(operation, model, data); +} + +bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertMean()"); + return ::ConvertMean(operation, model, data); +} + +bool HalPolicy::ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertMinimum()"); + return ::ConvertMinimum(operation, model, data); +} + +bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertMul()"); + return ::ConvertMul(operation, model, data); +} + +bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertPad()"); + return ::ConvertPad(operation, model, data); +} + +bool HalPolicy::ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertPadV2()"); + return ::ConvertPadV2(operation, model, data); +} + +bool HalPolicy::ConvertPrelu(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertPrelu()"); + return ::ConvertPrelu(operation, model, data); +} + +bool HalPolicy::ConvertQuantize(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertQuantize()"); + return ::ConvertQuantize(operation, model, data); +} + +bool HalPolicy::ConvertQuantizedLstm(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertQuantizedLstm()"); + return ::ConvertQuantizedLstm(operation, model, data); +} + +bool HalPolicy::ConvertReLu(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertReLu()"); + return ::ConvertReLu(operation, model, data); +} + +bool HalPolicy::ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertReLu1()"); + return ::ConvertReLu1(operation, model, data); +} + +bool HalPolicy::ConvertReLu6(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertReLu6()"); + return ::ConvertReLu6(operation, model, data); +} + +bool HalPolicy::ConvertReshape(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertReshape()"); + return ::ConvertReshape(operation, model, data); +} + +bool HalPolicy::ConvertResize(const Operation& operation, + const Model& model, + ConversionData& data, + ResizeMethod resizeMethod) +{ + ALOGV("hal_1_3::HalPolicy::ConvertResize()"); + return ::ConvertResize(operation, model, data, resizeMethod); +} + +bool HalPolicy::ConvertSpaceToBatchNd(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertSpaceToBatchNd()"); + return ::ConvertSpaceToBatchNd(operation, model, data); +} + +bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertSpaceToDepth()"); + return ::ConvertSpaceToDepth(operation, model, data); +} + +bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertSoftmax()"); + return ::ConvertSoftmax(operation, model, data); +} + +bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertSub()"); + return ::ConvertSub(operation, model, data); +} + +bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertTanH()"); + return ::ConvertTanH(operation, model, data); +} + +bool HalPolicy::ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertTransposeConv2d()"); + return ::ConvertTransposeConv2d(operation, model, data); +} + +bool HalPolicy::ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertSqrt()"); + ActivationDescriptor desc; + desc.m_Function = ActivationFunction::Sqrt; + + return ::ConvertToActivation(operation, __func__, desc, model, data); +} + +bool HalPolicy::ConvertSqueeze(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertSqueeze()"); + return ::ConvertSqueeze(operation, model, data); +} + +bool HalPolicy::ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertStridedSlice()"); + return ::ConvertStridedSlice(operation, model, data); +} + +bool HalPolicy::ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_3::HalPolicy::ConvertTranspose()"); + return ::ConvertTranspose(operation, model, data); +} + +} // namespace hal_1_3 +} // namespace armnn_driver diff --git a/1.3/HalPolicy.hpp b/1.3/HalPolicy.hpp new file mode 100644 index 00000000..f7771a6c --- /dev/null +++ b/1.3/HalPolicy.hpp @@ -0,0 +1,150 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "../ConversionUtils.hpp" +#include "../ConversionUtils_1_2.hpp" + +#include + +#include + +namespace V1_3 = ::android::hardware::neuralnetworks::V1_3; + +namespace armnn_driver +{ +namespace hal_1_3 +{ + +class HalPolicy +{ +public: + using Model = V1_3::Model; + using Operand = V1_3::Operand; + using OperandLifeTime = V1_3::OperandLifeTime; + using OperandType = V1_3::OperandType; + using Operation = V1_3::Operation; + using OperationType = V1_3::OperationType; + using ExecutionCallback = V1_3::IExecutionCallback; + using getSupportedOperations_cb = V1_3::IDevice::getSupportedOperations_1_3_cb; + using ErrorStatus = V1_3::ErrorStatus; + + static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data); + +private: + static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertArgMinMax(const Operation& operation, + const Model& model, + ConversionData& data, + armnn::ArgMinMaxFunction argMinMaxFunction); + + static bool ConvertAveragePool2d(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertComparison(const Operation& operation, + const Model& model, + ConversionData& data, + armnn::ComparisonOperation comparisonOperation); + + static bool ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertConv2d(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertDepthToSpace(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertDepthwiseConv2d(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertElementwiseUnary(const Operation& operation, + const Model& model, + ConversionData& data, + armnn::UnaryOperation unaryOperation); + + static bool ConvertExpandDims(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertFloor(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertFullyConnected(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertGroupedConv2d(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertInstanceNormalization(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertL2Normalization(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertL2Pool2d(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertLocalResponseNormalization(const Operation& operation, + const Model& model, + ConversionData& data); + + static bool ConvertLogistic(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertLogSoftmax(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertLstm(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertPrelu(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertQuantize(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertQuantizedLstm(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertReLu(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertReLu6(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertReshape(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertResize(const Operation& operation, + const Model& model, + ConversionData& data, + armnn::ResizeMethod resizeMethod); + + static bool ConvertSoftmax(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertSpaceToBatchNd(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertSpaceToDepth(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertSqueeze(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertTanH(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data); +}; + +} // namespace hal_1_3 +} // namespace armnn_driver diff --git a/Android.mk b/Android.mk index 6cc85eec..bac6db17 100644 --- a/Android.mk +++ b/Android.mk @@ -427,6 +427,114 @@ include $(BUILD_STATIC_LIBRARY) endif # PLATFORM_VERSION == Q +ifeq ($(R_OR_LATER),1) +# The following target is available starting from Android R + +####################### +# libarmnn-driver@1.3 # +####################### +include $(CLEAR_VARS) + +LOCAL_MODULE := libarmnn-driver@1.3 +LOCAL_MODULE_TAGS := optional +LOCAL_ARM_MODE := arm +LOCAL_PROPRIETARY_MODULE := true +# Mark source files as dependent on Android.mk +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk + +LOCAL_C_INCLUDES := \ + $(ARMNN_HEADER_PATH) \ + $(ARMNN_UTILS_HEADER_PATH) \ + $(OPENCL_HEADER_PATH) \ + $(NN_HEADER_PATH) + +LOCAL_CFLAGS := \ + -std=$(CPP_VERSION) \ + -fexceptions \ + -Werror \ + -Wno-format-security \ + -DBOOST_NO_AUTO_PTR \ + -DARMNN_ANDROID_NN_V1_3 \ + -DARMNN_ANDROID_R + +ifeq ($(ARMNN_DRIVER_DEBUG),1) +LOCAL_CFLAGS+= \ + -UNDEBUG +endif # ARMNN_DRIVER_DEBUG == 1 + +ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1) +LOCAL_CFLAGS += \ + -DARMCOMPUTECL_ENABLED +endif # ARMNN_COMPUTE_CL_ENABLED == 1 + +ifeq ($(ARMNN_COMPUTE_NEON_ENABLED),1) +LOCAL_CFLAGS += \ + -DARMCOMPUTENEON_ENABLED +endif # ARMNN_COMPUTE_NEON_ENABLED == 1 + +ifeq ($(ARMNN_REF_ENABLED),1) +LOCAL_CFLAGS += \ + -DARMNNREF_ENABLED +endif # ARMNN_REF_ENABLED == 1 + +LOCAL_SRC_FILES := \ + 1.0/ArmnnDriverImpl.cpp \ + 1.0/HalPolicy.cpp \ + 1.1/ArmnnDriverImpl.cpp \ + 1.1/HalPolicy.cpp \ + 1.2/ArmnnDriverImpl.cpp \ + 1.2/HalPolicy.cpp \ + 1.3/ArmnnDriverImpl.cpp \ + 1.3/HalPolicy.cpp \ + ArmnnDevice.cpp \ + ArmnnDriverImpl.cpp \ + ArmnnPreparedModel.cpp \ + ArmnnPreparedModel_1_2.cpp \ + ArmnnPreparedModel_1_3.cpp \ + ConversionUtils.cpp \ + DriverOptions.cpp \ + ModelToINetworkConverter.cpp \ + RequestThread.cpp \ + Utils.cpp + +LOCAL_STATIC_LIBRARIES := \ + libneuralnetworks_common \ + libboost_log \ + libboost_program_options \ + libboost_system \ + libboost_thread \ + libboost_filesystem \ + arm_compute_library + +LOCAL_WHOLE_STATIC_LIBRARIES := libarmnn + +LOCAL_SHARED_LIBRARIES := \ + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + liblog \ + libutils \ + libnativewindow \ + libui \ + libfmq \ + libcutils \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + android.hardware.neuralnetworks@1.0 \ + android.hardware.neuralnetworks@1.1 \ + android.hardware.neuralnetworks@1.2 \ + android.hardware.neuralnetworks@1.3 + +ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1) +LOCAL_SHARED_LIBRARIES+= \ + libOpenCL +endif + +include $(BUILD_STATIC_LIBRARY) + +endif # PLATFORM_VERSION == R + ##################################################### # android.hardware.neuralnetworks@1.0-service-armnn # ##################################################### @@ -714,6 +822,84 @@ include $(BUILD_EXECUTABLE) endif # PLATFORM_VERSION == Q +ifeq ($(R_OR_LATER),1) +# The following target is available starting from Android R + +##################################################### +# android.hardware.neuralnetworks@1.3-service-armnn # +##################################################### +include $(CLEAR_VARS) + +LOCAL_MODULE := android.hardware.neuralnetworks@1.3-service-armnn +LOCAL_INIT_RC := android.hardware.neuralnetworks@1.3-service-armnn.rc +LOCAL_MODULE_TAGS := optional +LOCAL_ARM_MODE := arm +LOCAL_MODULE_RELATIVE_PATH := hw +LOCAL_PROPRIETARY_MODULE := true +# Mark source files as dependent on Android.mk +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk + +LOCAL_C_INCLUDES := \ + $(ARMNN_HEADER_PATH) \ + $(NN_HEADER_PATH) + +LOCAL_CFLAGS := \ + -std=$(CPP_VERSION) \ + -fexceptions \ + -DARMNN_ANDROID_NN_V1_3 \ + -DBOOST_NO_AUTO_PTR \ + -DARMNN_ANDROID_R + +ifeq ($(ARMNN_DRIVER_DEBUG),1) +LOCAL_CFLAGS += \ + -UNDEBUG +endif # ARMNN_DRIVER_DEBUG == 1 + +LOCAL_SRC_FILES := \ + service.cpp + +LOCAL_STATIC_LIBRARIES := \ + libneuralnetworks_common \ + libboost_log \ + libboost_program_options \ + libboost_system \ + libboost_thread \ + libboost_filesystem \ + arm_compute_library + +LOCAL_WHOLE_STATIC_LIBRARIES := \ + libarmnn-driver@1.3 + +LOCAL_SHARED_LIBRARIES := \ + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + libdl \ + libhardware \ + liblog \ + libtextclassifier_hash \ + libutils \ + libnativewindow \ + libui \ + libfmq \ + libcutils \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + android.hardware.neuralnetworks@1.0 \ + android.hardware.neuralnetworks@1.1 \ + android.hardware.neuralnetworks@1.2 \ + android.hardware.neuralnetworks@1.3 + +ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1) +LOCAL_SHARED_LIBRARIES+= \ + libOpenCL +endif + +include $(BUILD_EXECUTABLE) + +endif # PLATFORM_VERSION == R + ########################## # armnn module and tests # ########################## diff --git a/ArmnnDriver.hpp b/ArmnnDriver.hpp index d961f861..a6fd9b2c 100644 --- a/ArmnnDriver.hpp +++ b/ArmnnDriver.hpp @@ -9,7 +9,28 @@ #include -#ifdef ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 +#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3 + +#include "1.1/ArmnnDriver.hpp" +#include "1.2/ArmnnDriver.hpp" +#include "1.3/ArmnnDriver.hpp" + +namespace armnn_driver +{ + +class ArmnnDriver : public hal_1_3::ArmnnDriver +{ +public: + ArmnnDriver(DriverOptions options) + : hal_1_3::ArmnnDriver(std::move(options)) + { + ALOGV("ArmnnDriver::ArmnnDriver()"); + } + ~ArmnnDriver() {} +}; + +} // namespace armnn_driver +#elif ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 #include "1.1/ArmnnDriver.hpp" #include "1.2/ArmnnDriver.hpp" diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp index eab95989..9c6d51fd 100644 --- a/ArmnnDriverImpl.cpp +++ b/ArmnnDriverImpl.cpp @@ -8,10 +8,16 @@ #include "ArmnnDriverImpl.hpp" #include "ArmnnPreparedModel.hpp" -#ifdef ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2 #include "ArmnnPreparedModel_1_2.hpp" #endif +#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_2 +#include "ArmnnPreparedModel_1_3.hpp" +#endif + +#include "Utils.hpp" + #include "ModelToINetworkConverter.hpp" #include "SystemPropertiesUtils.hpp" #include @@ -227,14 +233,14 @@ Return ArmnnDriverImpl::getSupportedOperations(const armnn::IRu if (!runtime) { - cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, result); + cb(HalErrorStatus::DEVICE_UNAVAILABLE, result); return Void(); } // Run general model validation, if this doesn't pass we shouldn't analyse the model anyway. if (!android::nn::validateModel(model)) { - cb(V1_0::ErrorStatus::INVALID_ARGUMENT, result); + cb(HalErrorStatus::INVALID_ARGUMENT, result); return Void(); } @@ -246,20 +252,22 @@ Return ArmnnDriverImpl::getSupportedOperations(const armnn::IRu if (modelConverter.GetConversionResult() != ConversionResult::Success && modelConverter.GetConversionResult() != ConversionResult::UnsupportedFeature) { - cb(V1_0::ErrorStatus::GENERAL_FAILURE, result); + cb(HalErrorStatus::GENERAL_FAILURE, result); return Void(); } // Check each operation if it was converted successfully and copy the flags // into the result (vector) that we need to return to Android. - result.reserve(model.operations.size()); - for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); operationIdx++) + result.reserve(getMainModel(model).operations.size()); + for (uint32_t operationIdx = 0; + operationIdx < getMainModel(model).operations.size(); + ++operationIdx) { bool operationSupported = modelConverter.IsOperationSupported(operationIdx); result.push_back(operationSupported); } - cb(V1_0::ErrorStatus::NONE, result); + cb(HalErrorStatus::NONE, result); return Void(); } @@ -286,4 +294,10 @@ template class ArmnnDriverImpl; template class ArmnnDriverImpl; #endif +#ifdef ARMNN_ANDROID_NN_V1_3 +template class ArmnnDriverImpl; +template class ArmnnDriverImpl; +template class ArmnnDriverImpl; +#endif + } // namespace armnn_driver diff --git a/ArmnnDriverImpl.hpp b/ArmnnDriverImpl.hpp index dfaafb34..cdff9054 100644 --- a/ArmnnDriverImpl.hpp +++ b/ArmnnDriverImpl.hpp @@ -20,6 +20,11 @@ namespace V1_1 = ::android::hardware::neuralnetworks::V1_1; namespace V1_2 = ::android::hardware::neuralnetworks::V1_2; #endif +#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3 +namespace V1_2 = ::android::hardware::neuralnetworks::V1_2; +namespace V1_3 = ::android::hardware::neuralnetworks::V1_3; +#endif + namespace armnn_driver { @@ -36,6 +41,7 @@ class ArmnnDriverImpl public: using HalModel = typename HalPolicy::Model; using HalGetSupportedOperations_cb = typename HalPolicy::getSupportedOperations_cb; + using HalErrorStatus = typename HalPolicy::ErrorStatus; static Return getSupportedOperations( const armnn::IRuntimePtr& runtime, diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp index d095e419..f990d3bc 100644 --- a/ArmnnPreparedModel.cpp +++ b/ArmnnPreparedModel.cpp @@ -294,7 +294,7 @@ bool ArmnnPreparedModel::ExecuteWithDummyInputs() { std::vector> storage; armnn::InputTensors inputTensors; - for (unsigned int i = 0; i < m_Model.inputIndexes.size(); i++) + for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++) { const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i); storage.emplace_back(inputTensorInfo.GetNumBytes()); @@ -304,7 +304,7 @@ bool ArmnnPreparedModel::ExecuteWithDummyInputs() } armnn::OutputTensors outputTensors; - for (unsigned int i = 0; i < m_Model.outputIndexes.size(); i++) + for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++) { const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); storage.emplace_back(outputTensorInfo.GetNumBytes()); @@ -349,4 +349,10 @@ template class ArmnnPreparedModel; template class ArmnnPreparedModel; template class ArmnnPreparedModel; #endif + +#ifdef ARMNN_ANDROID_NN_V1_3 +template class ArmnnPreparedModel; +template class ArmnnPreparedModel; +template class ArmnnPreparedModel; +#endif } // namespace armnn_driver diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp index 5031c5ff..76ef4265 100644 --- a/ArmnnPreparedModel_1_2.cpp +++ b/ArmnnPreparedModel_1_2.cpp @@ -2,9 +2,6 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // -// Note: the ArmnnBurstExecutorWithCache in this file is based on Android code -// under the Apache 2.0 license. See comment below for details. -// #define LOG_TAG "ArmnnDriver" @@ -215,27 +212,6 @@ Return ArmnnPreparedModel_1_2::execute_1_2( return Execute(request, measureTiming, cb); } -OutputShape ComputeShape(const armnn::TensorInfo& info) -{ - OutputShape shape; - - hidl_vec dimensions; - - armnn::TensorShape tensorShape = info.GetShape(); - const unsigned int numDims = tensorShape.GetNumDimensions(); - dimensions.resize(numDims); - - for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) - { - dimensions[outputIdx] = tensorShape[outputIdx]; - } - - shape.dimensions = dimensions; - shape.isSufficient = true; - - return shape; -} - template Return ArmnnPreparedModel_1_2::PrepareMemoryForInputs( armnn::InputTensors& inputs, @@ -348,27 +324,6 @@ Return ArmnnPreparedModel_1_2::PrepareMemoryForIO return V1_0::ErrorStatus::NONE; } -void CommitPools(std::vector<::android::nn::RunTimePoolInfo>& memPools) -{ - if (memPools.empty()) - { - return; - } - // Commit output buffers. - // Note that we update *all* pools, even if they aren't actually used as outputs - - // this is simpler and is what the CpuExecutor does. - for (auto& pool : memPools) - { - // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where - // update() has been removed and flush() added. -#if defined(ARMNN_ANDROID_R) // Use the new Android implementation. - pool.flush(); -#else - pool.update(); -#endif - } -} - template Return ArmnnPreparedModel_1_2::executeSynchronously(const V1_0::Request& request, MeasureTiming measureTiming, @@ -514,7 +469,7 @@ bool ArmnnPreparedModel_1_2::ExecuteWithDummyInputs() { std::vector> storage; armnn::InputTensors inputTensors; - for (unsigned int i = 0; i < m_Model.inputIndexes.size(); i++) + for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++) { const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i); storage.emplace_back(inputTensorInfo.GetNumBytes()); @@ -524,7 +479,7 @@ bool ArmnnPreparedModel_1_2::ExecuteWithDummyInputs() } armnn::OutputTensors outputTensors; - for (unsigned int i = 0; i < m_Model.outputIndexes.size(); i++) + for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++) { const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); storage.emplace_back(outputTensorInfo.GetNumBytes()); @@ -600,77 +555,6 @@ Return ArmnnPreparedModel_1_2::Execute(const V1_ return V1_0::ErrorStatus::NONE; } - -/// This class is strongly inspired by the default implementation in Android named DefaultBurstExecutorWithCache. -/// The original code is licensed under Apache-2.0 and can be found at the following link: -/// https://android.googlesource.com/platform/frameworks/ -/// ml/+/refs/tags/android-10.0.0_r20/nn/common/ExecutionBurstServer.cpp -class ArmnnBurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCache { -public: - ArmnnBurstExecutorWithCache(V1_2::IPreparedModel* preparedModel) - : m_PreparedModel(preparedModel) - {} - - bool isCacheEntryPresent(int32_t slot) const override - { - const auto it = m_MemoryCache.find(slot); - return (it != m_MemoryCache.end()) && it->second.valid(); - } - - void addCacheEntry(const hidl_memory& memory, int32_t slot) override - { - m_MemoryCache[slot] = memory; - } - - void removeCacheEntry(int32_t slot) override - { - m_MemoryCache.erase(slot); - } - - std::tuple, Timing> execute( - const V1_0::Request& request, const std::vector& slots, - MeasureTiming measure) override - { - ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache::execute"); - hidl_vec pools(slots.size()); - - std::transform(slots.begin(), slots.end(), pools.begin(), [this](int32_t slot) - { - return m_MemoryCache[slot]; - }); - - V1_0::Request fullRequest = request; - fullRequest.pools = std::move(pools); - - // Setup Callback - V1_0::ErrorStatus returnedStatus = V1_0::ErrorStatus::GENERAL_FAILURE; - hidl_vec returnedOutputShapes; - Timing returnedTiming; - auto cb = [&returnedStatus, &returnedOutputShapes, &returnedTiming](V1_0::ErrorStatus status, - const hidl_vec& outputShapes, - const Timing& timing) - { - returnedStatus = status; - returnedOutputShapes = outputShapes; - returnedTiming = timing; - }; - - // Execute - ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache executing"); - const Return ret = m_PreparedModel->executeSynchronously(fullRequest, measure, cb); - - if (!ret.isOk() || returnedStatus != V1_0::ErrorStatus::NONE) - { - ALOGE("ArmnnPreparedModel_1_2::BurstExecutorWithCache::error executing"); - } - return std::make_tuple(returnedStatus, std::move(returnedOutputShapes), returnedTiming); - } - -private: - V1_2::IPreparedModel* const m_PreparedModel; - std::map m_MemoryCache; -}; - template Return ArmnnPreparedModel_1_2::configureExecutionBurst( const sp& callback, @@ -679,12 +563,10 @@ Return ArmnnPreparedModel_1_2::configureExecutionBurst( V1_2::IPreparedModel::configureExecutionBurst_cb cb) { ALOGV("ArmnnPreparedModel_1_2::configureExecutionBurst"); - const std::shared_ptr executorWithCache = - std::make_shared(this); const sp burst = ExecutionBurstServer::create(callback, requestChannel, resultChannel, - executorWithCache); + this); if (burst == nullptr) { @@ -697,9 +579,7 @@ Return ArmnnPreparedModel_1_2::configureExecutionBurst( return Void(); } - - -#ifdef ARMNN_ANDROID_NN_V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) template class ArmnnPreparedModel_1_2; template bool ArmnnPreparedModel_1_2::ExecuteGraph( std::shared_ptr>& pMemPools, diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp new file mode 100644 index 00000000..155f8b25 --- /dev/null +++ b/ArmnnPreparedModel_1_3.cpp @@ -0,0 +1,698 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#define LOG_TAG "ArmnnDriver" + +#include "ArmnnPreparedModel_1_3.hpp" +#include "Utils.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace android; +using namespace android::hardware; + +namespace { + +static const Timing g_NoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX}; +using namespace armnn_driver; +using TimePoint = std::chrono::steady_clock::time_point; + +TimePoint Now() +{ + return std::chrono::steady_clock::now(); +} + +unsigned long MicrosecondsDuration(TimePoint endPoint, TimePoint startPoint) +{ + return static_cast(std::chrono::duration_cast( + endPoint - startPoint).count()); +} + +void NotifyCallbackAndCheck(const ::android::sp& callback, + V1_3::ErrorStatus errorStatus, + std::vector, + const Timing, + std::string callingFunction) +{ + Return returned = callback->notify(convertToV1_0(errorStatus)); + // This check is required, if the callback fails and it isn't checked it will bring down the service + if (!returned.isOk()) + { + ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s", + callingFunction.c_str(), returned.description().c_str()); + } +} + +void NotifyCallbackAndCheck(const ::android::sp& callback, + V1_3::ErrorStatus errorStatus, + std::vector outputShapes, + const Timing timing, + std::string callingFunction) +{ + Return returned = callback->notify_1_2(convertToV1_0(errorStatus), outputShapes, timing); + // This check is required, if the callback fails and it isn't checked it will bring down the service + if (!returned.isOk()) + { + ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s", + callingFunction.c_str(), returned.description().c_str()); + } +} + +void NotifyCallbackAndCheck(const ::android::sp& callback, + V1_3::ErrorStatus errorStatus, + std::vector outputShapes, + const Timing timing, + std::string callingFunction) +{ + Return returned = callback->notify_1_3(errorStatus, outputShapes, timing); + // This check is required, if the callback fails and it isn't checked it will bring down the service + if (!returned.isOk()) + { + ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s", + callingFunction.c_str(), returned.description().c_str()); + } +} + +bool ValidateRequestArgument(const RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo) +{ + if (requestArg.dimensions.size() != 0) + { + if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions()) + { + ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)", + requestArg.dimensions.size(), tensorInfo.GetNumDimensions()); + return false; + } + + for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d) + { + if (requestArg.dimensions[d] != tensorInfo.GetShape()[d]) + { + ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)", + d, requestArg.dimensions[d], tensorInfo.GetShape()[d]); + return false; + } + } + } + + return true; +} + +armnn::Tensor GetTensorForRequestArgument(const RequestArgument& requestArg, + const armnn::TensorInfo& tensorInfo, + const std::vector<::android::nn::RunTimePoolInfo>& requestPools) +{ + if (!ValidateRequestArgument(requestArg, tensorInfo)) + { + return armnn::Tensor(); + } + + return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools)); +} + +inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index) +{ + return tensorNamePrefix + std::to_string(index); +} + +} // anonymous namespace + +using namespace android::hardware; + +namespace armnn_driver +{ + +template +RequestThread + ArmnnPreparedModel_1_3::m_RequestThread; + +template +template +void ArmnnPreparedModel_1_3::DumpTensorsIfRequired(char const* tensorNamePrefix, + const TensorBindingCollection& tensorBindings) +{ + if (!m_RequestInputsAndOutputsDumpDir.empty()) + { + const std::string requestName = boost::str(boost::format("%1%_%2%.dump") % m_NetworkId % m_RequestCount); + for (std::size_t i = 0u; i < tensorBindings.size(); ++i) + { + DumpTensor(m_RequestInputsAndOutputsDumpDir, + requestName, + BuildTensorName(tensorNamePrefix, i), + tensorBindings[i].second); + } + } +} + +template +ArmnnPreparedModel_1_3::ArmnnPreparedModel_1_3(armnn::NetworkId networkId, + armnn::IRuntime* runtime, + const V1_3::Model& model, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled) + : m_NetworkId(networkId) + , m_Runtime(runtime) + , m_Model(model) + , m_RequestCount(0) + , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) + , m_GpuProfilingEnabled(gpuProfilingEnabled) +{ + // Enable profiling if required. + m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); +} + +template +ArmnnPreparedModel_1_3::~ArmnnPreparedModel_1_3() +{ + // Get a hold of the profiler used by this model. + std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkId); + + // Unload the network associated with this model. + m_Runtime->UnloadNetwork(m_NetworkId); + + // Dump the profiling info to a file if required. + DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); +} + +template +Return ArmnnPreparedModel_1_3::execute(const V1_0::Request& request, + const ::android::sp& callback) +{ + if (callback.get() == nullptr) + { + ALOGE("ArmnnPreparedModel_1_3::execute invalid callback passed"); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + + auto cb = [callback](V1_3::ErrorStatus errorStatus, + std::vector outputShapes, + const Timing& timing, + std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction); + }; + + + return convertToV1_0(Execute(convertToV1_3(request), MeasureTiming::NO, cb)); +} + +template +Return ArmnnPreparedModel_1_3::execute_1_2( + const V1_0::Request& request, + MeasureTiming measureTiming, + const sp& callback) +{ + if (callback.get() == nullptr) + { + ALOGE("ArmnnPreparedModel_1_3::execute_1_2 invalid callback passed"); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + + auto cb = [callback](V1_3::ErrorStatus errorStatus, + std::vector outputShapes, + const Timing& timing, + std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction); + }; + + return convertToV1_0(Execute(convertToV1_3(request), measureTiming, cb)); +} + +template +Return ArmnnPreparedModel_1_3::execute_1_3( + const V1_3::Request& request, + MeasureTiming measureTiming, + const V1_3::OptionalTimePoint&, + const sp& callback) +{ + if (callback.get() == nullptr) + { + ALOGE("ArmnnPreparedModel_1_3::execute_1_3 invalid callback passed"); + return V1_3::ErrorStatus::INVALID_ARGUMENT; + } + + auto cb = [callback](V1_3::ErrorStatus errorStatus, + std::vector outputShapes, + const Timing& timing, + std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction); + }; + + return Execute(request, measureTiming, cb); +} + +template +Return ArmnnPreparedModel_1_3::executeFenced(const V1_3::Request&, + const hidl_vec&, + MeasureTiming, + const OptionalTimePoint&, + const OptionalTimeoutDuration&, + executeFenced_cb cb) +{ + cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr); + return Void(); +} + +template +Return ArmnnPreparedModel_1_3::PrepareMemoryForInputs( + armnn::InputTensors& inputs, + const V1_3::Request& request, + const std::vector& memPools) +{ + inputs.reserve(request.inputs.size()); + for (unsigned int i = 0; i < request.inputs.size(); i++) + { + const auto& inputArg = request.inputs[i]; + + const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i); + const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools); + + if (inputTensor.GetMemoryArea() == nullptr) + { + ALOGE("Cannot execute request. Error converting request input %u to tensor", i); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + inputs.emplace_back(i, inputTensor); + } + + return V1_3::ErrorStatus::NONE; +} + +template +Return ArmnnPreparedModel_1_3::PrepareMemoryForOutputs( + armnn::OutputTensors& outputs, + std::vector &outputShapes, + const V1_3::Request& request, + const std::vector& memPools) +{ + outputs.reserve(request.outputs.size()); + for (unsigned int i = 0; i < request.outputs.size(); i++) + { + const auto& outputArg = request.outputs[i]; + + const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); + const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools); + if (outputTensor.GetMemoryArea() == nullptr) + { + ALOGE("Cannot execute request. Error converting request output %u to tensor", i); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + const size_t outputSize = outputTensorInfo.GetNumBytes(); + const size_t bufferSize = memPools.at(outputArg.location.poolIndex).getHidlMemory().size(); + if (bufferSize < outputSize) + { + ALOGW("ArmnnPreparedModel_1_3::Execute failed"); + return V1_3::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE; + } + + outputs.emplace_back(i, outputTensor); + outputShapes[i] = ComputeShape(outputTensorInfo); + } + + return V1_3::ErrorStatus::NONE; +} + +template +std::tuple, Timing, std::string> + ArmnnPreparedModel_1_3::PrepareMemoryForIO(armnn::InputTensors& inputs, + armnn::OutputTensors& outputs, + std::vector& memPools, + const V1_3::Request& request) +{ + if (!setRunTimePoolInfosFromMemoryPools(&memPools, request.pools)) + { + return {ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"}; + } + + // add the inputs and outputs with their data + try + { + if (PrepareMemoryForInputs(inputs, request, memPools) != V1_3::ErrorStatus::NONE) + { + return {ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"}; + } + + std::vector outputShapes(request.outputs.size()); + + auto errorStatus = PrepareMemoryForOutputs(outputs, outputShapes, request, memPools); + if (errorStatus != V1_3::ErrorStatus::NONE) + { + return {errorStatus, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::execute"}; + } + } + catch (armnn::Exception& e) + { + ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what()); + return {ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"}; + } + catch (std::exception& e) + { + ALOGE("std::exception caught while preparing for EnqueueWorkload: %s", e.what()); + return {ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"}; + } + + return {V1_3::ErrorStatus::NONE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"}; +} + +template +template +Return ArmnnPreparedModel_1_3::ExecuteSynchronously(const V1_3::Request& request, + CallbackContext cbCtx) +{ + if (cbCtx.ctx.measureTimings == MeasureTiming::YES) + { + cbCtx.ctx.driverStart = Now(); + } + + if (!android::nn::validateRequest(convertToV1_3(request), m_Model)) + { + ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model"); + cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT, + {}, + g_NoTiming, + "ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model"); + return Void(); + } + + if (!android::nn::validateRequest(request, m_Model)) + { + ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model"); + cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT, + {}, + g_NoTiming, + "ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model"); + } + + + // map the memory pool into shared pointers + // use a shared memory pools vector on the heap, as it is passed to the request thread + auto memPools = std::make_shared>(); + + // allocate the tensors on the heap, as they are passed to the request thread + auto inputs = std::make_shared(); + auto outputs = std::make_shared(); + + auto [status, outputShapes, timing, message] = PrepareMemoryForIO(*inputs, *outputs, *memPools, request); + if (status != V1_3::ErrorStatus::NONE) + { + cbCtx.callback(status, outputShapes, timing, message); + } + + ALOGV("ArmnnPreparedModel_1_3::ExecuteSynchronously() before Execution"); + + ExecuteGraph(memPools, *inputs, *outputs, cbCtx); + return Void(); +} + +template +Return ArmnnPreparedModel_1_3::executeSynchronously(const V1_0::Request& request, + MeasureTiming measureTiming, + executeSynchronously_cb cb) +{ + ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str()); + m_RequestCount++; + + if (cb == nullptr) + { + ALOGE("ArmnnPreparedModel_1_3::executeSynchronously invalid callback passed"); + return Void(); + } + + auto cbWrapper = [cb](V1_3::ErrorStatus errorStatus, + std::vector outputShapes, + const Timing& timing, + std::string) + { + cb(convertToV1_0(errorStatus), outputShapes, timing); + }; + + CallbackContext_1_3 cbCtx; + cbCtx.callback = cbWrapper; + cbCtx.ctx.measureTimings = measureTiming; + + ExecuteSynchronously(convertToV1_3(request), cbCtx); + return Void(); +} + +template +Return ArmnnPreparedModel_1_3::executeSynchronously_1_3(const V1_3::Request& request, + MeasureTiming measureTiming, + const V1_3::OptionalTimePoint& deadline, + executeSynchronously_1_3_cb cb) +{ + ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str()); + m_RequestCount++; + + if (cb == nullptr) + { + ALOGE("ArmnnPreparedModel_1_3::executeSynchronously_1_3 invalid callback passed"); + return Void(); + } + + if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) + { + ALOGE("ArmnnPreparedModel_1_3::executeSynchronously_1_3 invalid request model"); + cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming); + return Void(); + } + + auto cbWrapper = [cb](V1_3::ErrorStatus errorStatus, + std::vector outputShapes, + const Timing& timing, + std::string) + { + cb(errorStatus, outputShapes, timing); + }; + + CallbackContext_1_3 cbCtx; + cbCtx.callback = cbWrapper; + cbCtx.ctx.measureTimings = measureTiming; + + ExecuteSynchronously(request, cbCtx); + return Void(); +} + +template +Return ArmnnPreparedModel_1_3::configureExecutionBurst( + const sp& callback, + const MQDescriptorSync& requestChannel, + const MQDescriptorSync& resultChannel, + V1_3::IPreparedModel::configureExecutionBurst_cb cb) +{ + ALOGV("ArmnnPreparedModel_1_3::configureExecutionBurst"); + const sp burst = ExecutionBurstServer::create(callback, + requestChannel, + resultChannel, + this); + + if (burst == nullptr) + { + cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}); + } + else + { + cb(V1_0::ErrorStatus::NONE, burst); + } + return Void(); +} + +template +template +bool ArmnnPreparedModel_1_3::ExecuteGraph( + std::shared_ptr>& pMemPools, + armnn::InputTensors& inputTensors, + armnn::OutputTensors& outputTensors, + CallbackContext cb) +{ + ALOGV("ArmnnPreparedModel_1_3::ExecuteGraph(...)"); + + TimePoint driverEnd, deviceStart, deviceEnd; + + DumpTensorsIfRequired("Input", inputTensors); + + std::vector outputShapes(outputTensors.size()); + for (unsigned int i = 0; i < outputTensors.size(); i++) + { + std::pair outputTensorPair = outputTensors[i]; + const armnn::Tensor outputTensor = outputTensorPair.second; + const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo(); + + outputShapes[i] = ComputeShape(outputTensorInfo); + } + + // run it + try + { + if (cb.ctx.measureTimings == MeasureTiming::YES) + { + deviceStart = Now(); + } + + armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors); + + if (cb.ctx.measureTimings == MeasureTiming::YES) + { + deviceEnd = Now(); + } + if (status != armnn::Status::Success) + { + ALOGW("EnqueueWorkload failed"); + cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, + "ArmnnPreparedModel_1_3::ExecuteGraph"); + return false; + } + } + catch (armnn::Exception& e) + { + ALOGW("armnn:Exception caught from EnqueueWorkload: %s", e.what()); + cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); + return false; + } + catch (std::exception& e) + { + ALOGE("std::exception caught from EnqueueWorkload: %s", e.what()); + cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); + return false; + } + + CommitPools(*pMemPools); + + DumpTensorsIfRequired("Output", outputTensors); + + if (cb.ctx.measureTimings == MeasureTiming::YES) + { + driverEnd = Now(); + Timing timing; + timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart); + timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.ctx.driverStart); + ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu", timing.timeOnDevice, + timing.timeInDriver); + cb.callback(V1_3::ErrorStatus::NONE, outputShapes, timing, "ArmnnPreparedModel_1_3::ExecuteGraph"); + } else { + cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); + } + + return true; +} + +template +bool ArmnnPreparedModel_1_3::ExecuteWithDummyInputs() +{ + std::vector> storage; + armnn::InputTensors inputTensors; + for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++) + { + const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i); + storage.emplace_back(inputTensorInfo.GetNumBytes()); + const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data()); + + inputTensors.emplace_back(i, inputTensor); + } + + armnn::OutputTensors outputTensors; + for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++) + { + const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); + storage.emplace_back(outputTensorInfo.GetNumBytes()); + const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data()); + + outputTensors.emplace_back(i, outputTensor); + } + + auto nullCallback = [](V1_3::ErrorStatus, std::vector, const Timing&, std::string) {}; + CallbackContext_1_3 callbackContext; + callbackContext.callback = nullCallback; + callbackContext.ctx.measureTimings = MeasureTiming::NO; + auto memPools = std::make_shared>(); + return ExecuteGraph(memPools, + inputTensors, + outputTensors, + callbackContext); +} + +template +Return ArmnnPreparedModel_1_3::Execute(const V1_3::Request& request, + MeasureTiming measureTiming, + CallbackAsync_1_3 callback) +{ + ExecutionContext_1_3 ctx; + if (measureTiming == MeasureTiming::YES) + { + ctx.measureTimings = measureTiming; + ctx.driverStart = Now(); + } + + ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str()); + m_RequestCount++; + + if (!android::nn::validateRequest(request, m_Model)) + { + callback(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"); + return V1_3::ErrorStatus::INVALID_ARGUMENT; + } + + if (!m_RequestInputsAndOutputsDumpDir.empty()) + { + ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast(&callback)); + } + + // map the memory pool into shared pointers + // use a shared memory pools vector on the heap, as it is passed to the request thread + auto memPools = std::make_shared>(); + + // allocate the tensors on the heap, as they are passed to the request thread + auto inputTensors = std::make_shared(); + auto outputTensors = std::make_shared(); + + auto [status, outShapes, timing, message] = PrepareMemoryForIO(*inputTensors, *outputTensors, + *memPools, request); + if (status != V1_3::ErrorStatus::NONE) + { + callback(status, outShapes, timing, message); + } + + switch(status) + { + case V1_3::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE: + return V1_3::ErrorStatus::NONE; + case V1_3::ErrorStatus::GENERAL_FAILURE: + return V1_3::ErrorStatus::GENERAL_FAILURE; + default: + {} + } + + ALOGV("ArmnnPreparedModel_1_3::execute(...) before PostMsg"); + + // post the request for asynchronous execution + CallbackContext_1_3 cb; + cb.callback = callback; + cb.ctx = ctx; + m_RequestThread.PostMsg(this, memPools, inputTensors, outputTensors, cb); + ALOGV("ArmnnPreparedModel_1_3::execute(...) after PostMsg"); + return V1_3::ErrorStatus::NONE; +} + +#ifdef ARMNN_ANDROID_NN_V1_3 +template class ArmnnPreparedModel_1_3; +template bool ArmnnPreparedModel_1_3::ExecuteGraph( + std::shared_ptr>& pMemPools, + armnn::InputTensors& pInputTensors, + armnn::OutputTensors& pOutputTensors, + CallbackContext_1_3 cb); +#endif + +} // namespace armnn_driver diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp new file mode 100644 index 00000000..247149c8 --- /dev/null +++ b/ArmnnPreparedModel_1_3.hpp @@ -0,0 +1,137 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ArmnnDriver.hpp" +#include "ArmnnDriverImpl.hpp" +#include "RequestThread.hpp" +#include "ModelToINetworkConverter.hpp" + +#include +#include + +#include +#include + +namespace armnn_driver +{ +using CallbackAsync_1_3 = std::function< + void(V1_3::ErrorStatus errorStatus, + std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes, + const ::android::hardware::neuralnetworks::V1_2::Timing& timing, + std::string callingFunction)>; + +struct ExecutionContext_1_3 +{ + ::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings = + ::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO; + TimePoint driverStart; +}; + +using CallbackContext_1_3 = CallbackContext; + +using executeFenced_cb = std::function& callback)>; + +template +class ArmnnPreparedModel_1_3 : public V1_3::IPreparedModel +{ +public: + using HalModel = typename V1_3::Model; + + ArmnnPreparedModel_1_3(armnn::NetworkId networkId, + armnn::IRuntime* runtime, + const HalModel& model, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled); + + virtual ~ArmnnPreparedModel_1_3(); + + Return execute(const V1_0::Request& request, + const sp& callback) override; + + Return execute_1_2(const V1_0::Request& request, MeasureTiming measure, + const sp& callback) override; + + Return execute_1_3(const V1_3::Request& request, + V1_2::MeasureTiming measure, + const V1_3::OptionalTimePoint&, + const sp& callback) override; + + Return executeSynchronously(const V1_0::Request &request, + MeasureTiming measure, + V1_3::IPreparedModel::executeSynchronously_cb cb) override; + + Return executeSynchronously_1_3(const V1_3::Request &request, + MeasureTiming measure, + const V1_3::OptionalTimePoint& deadline, + V1_3::IPreparedModel::executeSynchronously_1_3_cb cb) override; + + Return executeFenced(const V1_3::Request& request, + const android::hardware::hidl_vec& wait_for, + MeasureTiming measure, + const V1_3::OptionalTimePoint& deadline, + const V1_3::OptionalTimeoutDuration& duration, + executeFenced_cb callback) override; + + Return configureExecutionBurst( + const sp& callback, + const android::hardware::MQDescriptorSync& requestChannel, + const android::hardware::MQDescriptorSync& resultChannel, + configureExecutionBurst_cb cb) override; + + template + Return ExecuteSynchronously(const V1_3::Request& request, CallbackContext cbCtx); + + /// execute the graph prepared from the request + template + bool ExecuteGraph(std::shared_ptr>& pMemPools, + armnn::InputTensors& inputTensors, + armnn::OutputTensors& outputTensors, + CallbackContext callback); + + /// Executes this model with dummy inputs (e.g. all zeroes). + /// \return false on failure, otherwise true + bool ExecuteWithDummyInputs(); + +private: + Return Execute(const V1_3::Request& request, + MeasureTiming measureTiming, + CallbackAsync_1_3 callback); + + Return PrepareMemoryForInputs( + armnn::InputTensors& inputs, + const V1_3::Request& request, + const std::vector& memPools); + + Return PrepareMemoryForOutputs( + armnn::OutputTensors& outputs, + std::vector &outputShapes, + const V1_3::Request& request, + const std::vector& memPools); + + std::tuple, Timing, std::string> PrepareMemoryForIO( + armnn::InputTensors& inputs, + armnn::OutputTensors& outputs, + std::vector& memPools, + const V1_3::Request& request); + + template + void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); + + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + V1_3::Model m_Model; + // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads + // It is specific to this class, so it is declared as static here + static RequestThread m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; +}; + +} diff --git a/ConversionUtils.hpp b/ConversionUtils.hpp index 90b1c7de..315089c2 100644 --- a/ConversionUtils.hpp +++ b/ConversionUtils.hpp @@ -183,7 +183,7 @@ inline bool IsOperandTypeSupportedForTensors(V1_0::OperandType type) type == V1_0::OperandType::TENSOR_INT32; } -#ifdef ARMNN_ANDROID_NN_V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Support within the 1.2 driver for specific tensor data types inline bool IsOperandTypeSupportedForTensors(V1_2::OperandType type) @@ -201,17 +201,34 @@ inline bool IsOperandTypeSupportedForTensors(V1_2::OperandType type) #endif +#ifdef ARMNN_ANDROID_NN_V1_3 + +// Support within the 1.3 driver for specific tensor data types +inline bool IsOperandTypeSupportedForTensors(V1_3::OperandType type) +{ + return type == V1_3::OperandType::BOOL || + type == V1_3::OperandType::TENSOR_FLOAT16 || + type == V1_3::OperandType::TENSOR_FLOAT32 || + type == V1_3::OperandType::TENSOR_QUANT8_ASYMM || + type == V1_3::OperandType::TENSOR_QUANT8_SYMM || + type == V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL || + type == V1_3::OperandType::TENSOR_QUANT16_SYMM || + type == V1_3::OperandType::TENSOR_INT32; +} + +#endif + inline bool IsBool(V1_0::Operand) { return false; } -inline bool Is12Operand(V1_0::Operand) +inline bool Is12OrLaterOperand(V1_0::Operand) { return false; } -#ifdef ARMNN_ANDROID_NN_V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) inline bool IsBool(V1_2::Operand operand) { @@ -219,7 +236,22 @@ inline bool IsBool(V1_2::Operand operand) } /// Checks if a operand is 1_2 Operand -inline bool Is12Operand(V1_2::Operand) +inline bool Is12OrLaterOperand(V1_2::Operand) +{ + return true; +} + +#endif + +#ifdef ARMNN_ANDROID_NN_V1_3 + +inline bool IsBool(V1_3::Operand operand) +{ + return operand.type == V1_3::OperandType::BOOL; +} + +/// Checks if a operand is 1_2 Operand +inline bool Is12OrLaterOperand(V1_3::Operand) { return true; } @@ -351,7 +383,7 @@ void CalcPadding(uint32_t input, outPadTail = boost::numeric_cast(padTail); } -#ifdef ARMNN_ANDROID_NN_V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t dilation, uint32_t& outPadHead, uint32_t& outPadTail, android::nn::PaddingScheme scheme) @@ -381,7 +413,7 @@ Shape GetOperandShape(const V1_0::Operand& operand) return shape; } -#ifdef ARMNN_ANDROID_NN_V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) Shape GetOperandShape(const V1_2::Operand& operand) { @@ -395,6 +427,20 @@ Shape GetOperandShape(const V1_2::Operand& operand) #endif +#ifdef ARMNN_ANDROID_NN_V1_3 + +Shape GetOperandShape(const V1_3::Operand& operand) +{ + Shape shape; + shape.type = OperandType(operand.type); + shape.dimensions = operand.dimensions; + shape.scale = operand.scale; + shape.offset = operand.zeroPoint; + return shape; +} + +#endif + // ArmNN requires the bias scale to be equal to the product of the weight and input scales, which is also // what AndroidNN requires. However for some of the AndroidNN tests the values don't exactly match so // we accept some tolerance. We don't want ArmNN itself to accept these inconsistencies as it is up to the @@ -636,8 +682,9 @@ const HalOperand* GetInputOperand(const HalOperation& operation, return nullptr; } - BOOST_ASSERT(operation.inputs[inputIndex] < model.operands.size()); // Model should have been validated beforehand - return &model.operands[operation.inputs[inputIndex]]; + // Model should have been validated beforehand + BOOST_ASSERT(operation.inputs[inputIndex] < getMainModel(model).operands.size()); + return &getMainModel(model).operands[operation.inputs[inputIndex]]; } template +LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetworks::V1_3::Operation& operation, + uint32_t inputIndex, + const::android::hardware::neuralnetworks::V1_3::Model& model, + ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + using HalOperandLifeTime = typename HalPolicy::OperandLifeTime; + + const HalOperand* operand = GetInputOperand(operation, inputIndex, model); + if (!operand) + { + Fail("%s: failed to get input operand %i", __func__, inputIndex); + return LayerInputHandle(); + } + + if (!IsOperandTypeSupportedForTensors(operand->type)) + { + Fail("%s: unsupported operand type for tensor %s", __func__, toString(operand->type).c_str()); + return LayerInputHandle(); + } + + try + { + armnn::TensorInfo operandTensorInfo = GetTensorInfoForOperand(*operand); + if (IsDynamicTensor(operandTensorInfo)) + { + Fail("%s: dynamic input tensors are not supported", __func__); + return LayerInputHandle(); + } + + switch (operand->lifetime) + { + case HalOperandLifeTime::SUBGRAPH_INPUT: + { + // NOTE: We must check whether we can support the input tensor on at least one + // of the provided backends; otherwise we cannot convert the operation + bool isInputSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsInputSupported, + data.m_Backends, + isInputSupported, + operandTensorInfo); + + if (!isInputSupported) + { + Fail("%s: unsupported input tensor", __func__); + return LayerInputHandle(); + } + + BOOST_FALLTHROUGH; // intentional fallthrough + } + case HalOperandLifeTime::TEMPORARY_VARIABLE: // intentional fallthrough + case HalOperandLifeTime::SUBGRAPH_OUTPUT: + { + // The tensor is either an operand internal to the model, or a model input. + // It can be associated with an ArmNN output slot for an existing layer. + + // m_OutputSlotForOperand[...] can be nullptr if the previous layer could not be converted + const uint32_t operandIndex = operation.inputs[inputIndex]; + return LayerInputHandle(true, data.m_OutputSlotForOperand[operandIndex], operandTensorInfo); + } + case HalOperandLifeTime::CONSTANT_COPY: // intentional fallthrough + case HalOperandLifeTime::CONSTANT_REFERENCE: + { + // The tensor has an already known constant value, and can be converted into an ArmNN Constant layer. + ConstTensorPin tensorPin = ConvertOperandToConstTensorPin(*operand, model, data); + if (tensorPin.IsValid()) + { + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsConstantSupported, + data.m_Backends, + isSupported, + tensorPin.GetConstTensor().GetInfo()); + if (!isSupported) + { + return LayerInputHandle(); + } + + armnn::IConnectableLayer* constantLayer = + data.m_Network->AddConstantLayer(tensorPin.GetConstTensor()); + armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0); + outputSlot.SetTensorInfo(tensorPin.GetConstTensor().GetInfo()); + + return LayerInputHandle(true, &outputSlot, operandTensorInfo); + } + else + { + Fail("%s: invalid operand tensor", __func__); + return LayerInputHandle(); + } + break; + } + default: + { + // Unsupported lifetime for an input tensor + Fail("%s: unsupported lifetime for input tensor: %s", + __func__, toString(operand->lifetime).c_str()); + return LayerInputHandle(); + } + } + } + catch (UnsupportedOperand& e) + { + Fail("%s: Operand type %s not supported in ArmnnDriver", __func__, toString(e.m_type).c_str()); + return LayerInputHandle(); + } +} +#endif + template @@ -1448,7 +1609,7 @@ bool ConvertPooling2d(const HalOperation& operation, return Fail("%s: Operation has invalid inputs", operationName); } - if (Is12Operand(*output)) + if (Is12OrLaterOperand(*output)) { desc.m_DataLayout = OptionalDataLayout(operation, 10, model, data); } @@ -1467,7 +1628,7 @@ bool ConvertPooling2d(const HalOperation& operation, return Fail("%s: Operation has invalid inputs", operationName); } - if (Is12Operand(*output)) + if (Is12OrLaterOperand(*output)) { desc.m_DataLayout = OptionalDataLayout(operation, 7, model, data); } @@ -2106,7 +2267,7 @@ bool ConvertDepthToSpace(const HalOperation& operation, const HalModel& model, C } descriptor.m_DataLayout = armnn::DataLayout::NHWC; - if (Is12Operand(*output)) + if (Is12OrLaterOperand(*output)) { descriptor.m_DataLayout = OptionalDataLayout(operation, 2, model, data); } @@ -2440,7 +2601,7 @@ inline bool IsQSymm8(const V1_0::Operand&) return false; } -#ifdef ARMNN_ANDROID_NN_V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) inline bool IsQSymm8(const V1_2::Operand& operand) { @@ -2449,6 +2610,15 @@ inline bool IsQSymm8(const V1_2::Operand& operand) #endif +#ifdef ARMNN_ANDROID_NN_V1_3 + +inline bool IsQSymm8(const V1_3::Operand& operand) +{ + return operand.type == V1_3::OperandType::TENSOR_QUANT8_SYMM; +} + +#endif + enum class DequantizeStatus { SUCCESS, @@ -2484,10 +2654,10 @@ DequantizeResult DequantizeIfRequired(size_t operand_index, // The weights are a non const tensor, this indicates they might be the output of a dequantize op. // Iterate over the nodes and find the previous operation which should be DEQUANTIZE - for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); ++operationIdx) + for (uint32_t operationIdx = 0; operationIdx < getMainModel(model).operations.size(); ++operationIdx) { // Search for the DEQUANTIZE op which has the operand with index equal to operandIndex - const auto& operationIt = model.operations[operationIdx]; + const auto& operationIt = getMainModel(model).operations[operationIdx]; if (operationIt.type != HalPolicy::OperationType::DEQUANTIZE) { continue; @@ -3525,7 +3695,7 @@ bool ConvertBatchToSpaceNd(const HalOperation& operation, batchToSpaceNdDesc.m_BlockShape.assign(block.cbegin(), block.cend()); batchToSpaceNdDesc.m_DataLayout = armnn::DataLayout::NHWC; - if (Is12Operand(*output)) + if (Is12OrLaterOperand(*output)) { batchToSpaceNdDesc.m_DataLayout = OptionalDataLayout(operation, 2, model, data); } @@ -3633,7 +3803,7 @@ bool ConvertSpaceToBatchNd(const HalOperation& operation, const HalModel& model, descriptor.m_BlockShape.assign(blockShape.cbegin(), blockShape.cend()); descriptor.m_PadList.assign(paddingList.cbegin(), paddingList.cend()); - if (Is12Operand(*output)) + if (Is12OrLaterOperand(*output)) { descriptor.m_DataLayout = OptionalDataLayout(operation, 3, model, data); } diff --git a/ConversionUtils_1_2.hpp b/ConversionUtils_1_2.hpp new file mode 100644 index 00000000..460c88bd --- /dev/null +++ b/ConversionUtils_1_2.hpp @@ -0,0 +1,2590 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Utils.hpp" + +#include "ConversionUtils.hpp" +#include + +#include + +using Half = half_float::half; + +namespace armnn_driver +{ + +using namespace armnn; +using namespace android::nn; + +template +bool IsQSymmDequantizeForWeights(const HalOperation& operation, const HalModel& model) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperationType = typename HalPolicy::OperationType; + + const HalOperand* operand = GetInputOperand(operation, 0, model); + if (!operand) + { + return false; + } + + if(!IsQSymm8(*operand)) + { + // Only QSymm8 weights are dequantized on the fly by the driver + return false; + } + + if (!IsOperandConstant(*operand)) + { + // Non-const input is not accepted for weights + return false; + } + + // Iterate through all the operations and find the operation feeding from the Dequantize output + const size_t outputIndex = operation.outputs[0]; + for (uint32_t operationIdx = 0; operationIdx < getMainModel(model).operations.size(); ++operationIdx) + { + const auto& operationIt = getMainModel(model).operations[operationIdx]; + switch (operationIt.type) + { + case HalOperationType::FULLY_CONNECTED: + if (outputIndex == operationIt.inputs[1]) // Weights are bound to slot 1 + { + // If the output is going into the FC weights return true + return true; + } + break; + case HalOperationType::LSTM: + for (size_t k = 0; k < operationIt.inputs.size(); ++k) + { + if (outputIndex == operationIt.inputs[k]) + { + // If the output is going into the LSTM weights return true + return true; + } + } + break; + default: + break; + } + } + + return false; +} + +template +bool SetupAndTrackLayerOutputSlotAndOverrideTensorInfo(const HalOperation& operation, + uint32_t operationOutputIndex, + armnn::IConnectableLayer& layer, + uint32_t layerOutputIndex, + const HalModel& model, + ConversionData& data, + const armnn::TensorInfo tensor_info) +{ + using HalOperand = typename HalPolicy::Operand; + + const HalOperand* outputOperand = GetOutputOperand(operation, operationOutputIndex, model); + if ((outputOperand == nullptr) || (operationOutputIndex >= layer.GetNumOutputSlots())) + { + return false; + } + + armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(layerOutputIndex); + + const uint32_t operandIndex = operation.outputs[operationOutputIndex]; + data.m_OutputSlotForOperand[operandIndex] = &outputSlot; + + outputSlot.SetTensorInfo(tensor_info); + + return true; +} + +template +bool ConvertComparison_1_2(const HalOperation& operation, + const HalModel& model, + ConversionData& data, + ComparisonOperation comparisonOperation) +{ + using HalOperand = typename HalPolicy::Operand; + + ALOGV("HalPolicy::ConvertComparison()"); + ALOGV("comparisonOperation = %s", GetComparisonOperationAsCString(comparisonOperation)); + + LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0, model, data); + LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1, model, data); + + if (!(input0.IsValid() && input1.IsValid())) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& inputInfo0 = input0.GetTensorInfo(); + const TensorInfo& inputInfo1 = input1.GetTensorInfo(); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + ComparisonDescriptor descriptor(comparisonOperation); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsComparisonSupported, + data.m_Backends, + isSupported, + inputInfo0, + inputInfo1, + outputInfo, + descriptor); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddComparisonLayer(descriptor); + assert(layer != nullptr); + + bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data); + if (!isReshapeSupported) + { + return false; + } + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertConv2d_1_2()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + Convolution2dDescriptor desc; + desc.m_DataLayout = DataLayout::NHWC; + + // Determine whether padding is implicit or explicit + bool implicitPadding = operation.inputs.size() == 7 || + (operation.inputs.size() >= 8 && + GetInputOperand(operation, 7, model)->type == HalOperandType::BOOL); + + if (implicitPadding) + { + desc.m_DataLayout = OptionalDataLayout(operation, 7, model, data); + } + else if (operation.inputs.size() >= 10) + { + desc.m_DataLayout = OptionalDataLayout(operation, 10, model, data); + } + + const PermutationVector OHWIToOIHW = {0, 2, 3, 1}; + + // ArmNN does not currently support non-fixed weights or bias + // The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in] but ArmNN expects the + // filter's height and width indices to match the input's height and width indices so we permute it to OIHW if + // the DataLayout is NCHW + const ConstTensorPin weightsPin = (desc.m_DataLayout == DataLayout::NCHW) ? + ConvertOperationInputToConstTensorPin(operation, 1, + model, data, OHWIToOIHW) : + ConvertOperationInputToConstTensorPin(operation, 1, model, data); + const ConstTensorPin biasPin = + ConvertOperationInputToConstTensorPin(operation, 2, model, data); + + if (!weightsPin.IsValid()) + { + return Fail("%s: Operation has invalid weights", __func__); + } + + if (!biasPin.IsValid()) + { + return Fail("%s: Operation has invalid biases", __func__); + } + + ConstTensor weights = weightsPin.GetConstTensor(); + ConstTensor bias = biasPin.GetConstTensor(); + SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo); + + ActivationFn activation; + + if (implicitPadding) + { + android::nn::PaddingScheme paddingScheme; + if (!GetInputPaddingScheme(operation, 3, paddingScheme, model, data) || + !GetInputScalar(operation, 4, HalOperandType::INT32, desc.m_StrideX, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, desc.m_StrideY, model, data) || + !GetInputActivationFunction(operation, 6, activation, model, data) || + !GetOptionalConvolutionDilationParams(operation, 8, desc, model, data)) + { + return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); + } + + armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout); + unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); + unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); + const uint32_t kernelX = weights.GetShape()[widthIndex]; + const uint32_t kernelY = weights.GetShape()[heightIndex]; + const uint32_t inputX = inputInfo.GetShape()[widthIndex]; + const uint32_t inputY = inputInfo.GetShape()[heightIndex]; + + CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, paddingScheme); + CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_DilationY, desc.m_PadTop, desc.m_PadBottom, paddingScheme); + + } + else if (operation.inputs.size() >= 10) + { + // explicit padding + if (!GetInputScalar(operation, 3, HalOperandType::INT32, desc.m_PadLeft, model, data) || + !GetInputScalar(operation, 4, HalOperandType::INT32, desc.m_PadRight, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, desc.m_PadTop, model, data) || + !GetInputScalar(operation, 6, HalOperandType::INT32, desc.m_PadBottom, model, data) || + !GetInputScalar(operation, 7, HalOperandType::INT32, desc.m_StrideX, model, data) || + !GetInputScalar(operation, 8, HalOperandType::INT32, desc.m_StrideY, model, data) || + !GetInputActivationFunction(operation, 9, activation, model, data) || + !GetOptionalConvolutionDilationParams(operation, 11, desc, model, data)) + { + return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); + } + } + else + { + return Fail("%s: Unsupported number of operation inputs", __func__); + } + + desc.m_BiasEnabled = true; + Optional biases(bias.GetInfo()); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsConvolution2dSupported, + data.m_Backends, + isSupported, + inputInfo, + outputInfo, + desc, + weights.GetInfo(), + biases); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* startLayer = + data.m_Network->AddConvolution2dLayer(desc, weights, Optional(bias)); + + if (!startLayer) + { + return Fail("%s: AddConvolution2dLayer failed", __func__); + } + + IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, startLayer, data); + + if (!endLayer) + { + return Fail("%s: ProcessActivation failed", __func__); + } + + input.Connect(startLayer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); +} + +template +bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertDepthwiseConv2d_1_2()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + // ArmNN does not currently support non-fixed weights or bias + // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ] + const HalOperand* weightsOperand = GetInputOperand(operation, 1, model); + + if (weightsOperand == nullptr) + { + return Fail("%s: Operand is invalid", __func__); + } + if ( weightsOperand->dimensions[0] != 1) + { + return Fail("%s: Invalid weights; for depthwise convolution, dimension 0 must be 1 but it is %i", + __func__, weightsOperand->dimensions[0] ); + } + + DepthwiseConvolution2dDescriptor desc; + desc.m_DataLayout = DataLayout::NHWC; + + // Determine whether padding is implicit or explicit + bool implicitPadding = operation.inputs.size() == 8 || + (operation.inputs.size() >= 9 && + GetInputOperand(operation, 8, model)->type == HalOperandType::BOOL); + + // Look ahead to find the optional DataLayout, if present + const uint32_t dataLayoutFlagIndex = implicitPadding ? 8 : 11; + desc.m_DataLayout = OptionalDataLayout(operation, dataLayoutFlagIndex, model, data); + + armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout); + unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); + unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); + unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); + + // Reinterpret weight data as [ H, W, I, M ] + TensorShape weightsShape({ weightsOperand->dimensions[1], + weightsOperand->dimensions[2], + inputInfo.GetShape()[channelsIndex], + weightsOperand->dimensions[3] / inputInfo.GetShape()[channelsIndex] }); + + // Swizzle weight data [ H, W, I, M ] -> [ M, I, H, W ] + const PermutationVector HWIMToMIHW = { 2U, 3U, 1U, 0U }; + + const ConstTensorPin weightsPin = + ConvertOperationInputToConstTensorPin(operation, + 1, + model, + data, + HWIMToMIHW, + &weightsShape); + + // Bias is a 1D tensor + const ConstTensorPin biasPin = + ConvertOperationInputToConstTensorPin(operation, 2, model, data); + + if (!weightsPin.IsValid()) + { + return Fail("%s: Operation has invalid weights", __func__); + } + + if (!biasPin.IsValid()) + { + return Fail("%s: Operation has invalid biases", __func__); + } + + ConstTensor weights = weightsPin.GetConstTensor(); + ConstTensor bias = biasPin.GetConstTensor(); + SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo); + + ActivationFn activation; + + if (implicitPadding) + { + android::nn::PaddingScheme paddingScheme; + if (!GetInputPaddingScheme(operation, 3, paddingScheme, model, data) || + !GetInputScalar(operation, 4, HalOperandType::INT32, desc.m_StrideX, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, desc.m_StrideY, model, data) || + !GetInputActivationFunction(operation, 7, activation, model, data) || + !GetOptionalConvolutionDilationParams(operation, 9, desc, model, data)) + { + return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); + } + + const uint32_t kernelX = weights.GetShape()[3]; + const uint32_t kernelY = weights.GetShape()[2]; + const uint32_t inputX = inputInfo.GetShape()[widthIndex]; + const uint32_t inputY = inputInfo.GetShape()[heightIndex]; + + CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, paddingScheme); + CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_DilationY, desc.m_PadTop, desc.m_PadBottom, paddingScheme); + } + else if (operation.inputs.size() >= 11) + { + // explicit padding + if (!GetInputScalar(operation, 3, HalOperandType::INT32, desc.m_PadLeft, model, data) || + !GetInputScalar(operation, 4, HalOperandType::INT32, desc.m_PadRight, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, desc.m_PadTop, model, data) || + !GetInputScalar(operation, 6, HalOperandType::INT32, desc.m_PadBottom, model, data) || + !GetInputScalar(operation, 7, HalOperandType::INT32, desc.m_StrideX, model, data) || + !GetInputScalar(operation, 8, HalOperandType::INT32, desc.m_StrideY, model, data) || + !GetInputActivationFunction(operation, 10, activation, model, data) || + !GetOptionalConvolutionDilationParams(operation, 12, desc, model, data)) + { + return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); + } + } + else + { + return Fail("%s: Unsupported number of operation inputs", __func__); + } + + desc.m_BiasEnabled = true; + Optional biases(bias.GetInfo()); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsDepthwiseConvolutionSupported, + data.m_Backends, + isSupported, + inputInfo, + outputInfo, + desc, + weights.GetInfo(), + biases); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* startLayer = + data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, Optional(bias)); + + if (!startLayer) + { + return Fail("%s: AddDepthwiseConvolution2dLayer failed", __func__); + } + + IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, startLayer, data); + if (!endLayer) + { + return Fail("%s: ProcessActivation failed", __func__); + } + + input.Connect(startLayer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); +} + +template +bool ConvertDequantize_1_2(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + ALOGV("HalPolicy::ConvertDequantize()"); + + if (IsQSymmDequantizeForWeights(operation, model)) + { + // NOTE: QSymm8 weights are dequantized internally by the driver, + // therefore this type of Dequantize is implicitly supported + return true; + } + + return ::ConvertDequantize(operation, model, data); +} + +template +bool ConvertElementwiseUnary(const HalOperation& operation, + const HalModel& model, + ConversionData& data, + UnaryOperation unaryOperation) +{ + using HalOperand = typename HalPolicy::Operand; + + ALOGV("HalPolicy::ConvertElementwiseUnary()"); + ALOGV("unaryOperation = %s", GetUnaryOperationAsCString(unaryOperation)); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid input", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + ElementwiseUnaryDescriptor descriptor(unaryOperation); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsElementwiseUnarySupported, + data.m_Backends, + isSupported, + inputInfo, + outputInfo, + descriptor); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddElementwiseUnaryLayer(descriptor); + assert(layer != nullptr); + + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertExpandDims(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertExpandDims()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid input", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Operation has invalid output", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + int32_t axis; + if (!GetInputScalar(operation, 1, HalOperandType::INT32, axis, model, data)) + { + return Fail("%s: failed to get axis input value", __func__); + } + + TensorShape targetShape; + + try + { + targetShape = armnnUtils::ExpandDims(input.GetTensorInfo().GetShape(), axis); + } + catch (const std::exception& e) + { + return Fail("%s: %s", __func__, e.what()); + } + + if (targetShape != outputInfo.GetShape()) + { + return Fail("%s: Shape of the output operand does not match the resolved expanded shape", __func__); + } + + ReshapeDescriptor reshapeDescriptor; + reshapeDescriptor.m_TargetShape = targetShape; + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsReshapeSupported, + data.m_Backends, + isSupported, + input.GetTensorInfo(), + outputInfo, + reshapeDescriptor); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddReshapeLayer(reshapeDescriptor); + assert(layer != nullptr); + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertGroupedConv2d()"); + + // + // Parse data + // + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + const TensorInfo& inputInfo = input.GetTensorInfo(); + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + // Look ahead to determine data layout + DataLayout dataLayout = DataLayout::NHWC; + if (operation.inputs.size() == 12) + { + dataLayout = OptionalDataLayout(operation, 11, model, data); + } + else + { + dataLayout = OptionalDataLayout(operation, 8, model, data); + } + + // NOTE: + // NNAPI weights are always OHWI, i.e. [depth_out, filter_height, filter_width, depth_group], + // but Arm NN expects the filter's height and width indices to match the input's height and + // width indices so when the DataLayout is NCHW, we need to permute the weights to OIHW + const PermutationVector ohwiToOihw = { 0u, 2u, 3u, 1u }; + const ConstTensorPin weightsPin = (dataLayout == DataLayout::NCHW) ? + ConvertOperationInputToConstTensorPin(operation, 1, + model, data, ohwiToOihw) : + ConvertOperationInputToConstTensorPin(operation, 1, model, data); + const ConstTensorPin biasesPin = + ConvertOperationInputToConstTensorPin(operation, 2, model, data); + if (!weightsPin.IsValid() || !biasesPin.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + ConstTensor weights = weightsPin.GetConstTensor(); + ConstTensor biases = biasesPin.GetConstTensor(); + SanitizeBiasQuantizationScale(biases.GetInfo(), weights.GetInfo(), inputInfo); + + const TensorShape& inputShape = inputInfo.GetShape(); + const TensorShape& outputShape = outputInfo.GetShape(); + const TensorShape& weightsShape = weights.GetShape(); + const TensorShape& biasesShape = biases.GetShape(); + + armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout); + const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); + const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); + const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); + + Convolution2dDescriptor desc; + desc.m_DataLayout = dataLayout; + desc.m_BiasEnabled = true; + + int numGroups; + ActivationFn activation; + + if (operation.inputs.size() == 12) + { + if (!GetInputScalar(operation, 3, HalOperandType::INT32, desc.m_PadLeft, model, data) || + !GetInputScalar(operation, 4, HalOperandType::INT32, desc.m_PadRight, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, desc.m_PadTop, model, data) || + !GetInputScalar(operation, 6, HalOperandType::INT32, desc.m_PadBottom, model, data) || + !GetInputScalar(operation, 7, HalOperandType::INT32, desc.m_StrideX, model, data) || + !GetInputScalar(operation, 8, HalOperandType::INT32, desc.m_StrideY, model, data) || + !GetInputScalar(operation, 9, HalOperandType::INT32, numGroups, model, data) || + !GetInputActivationFunction(operation, 10, activation, model, data)) + { + return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); + } + + } + else if (operation.inputs.size() == 9) + { + android::nn::PaddingScheme paddingScheme; + if (!GetInputPaddingScheme(operation, 3, paddingScheme, model, data) || + !GetInputScalar(operation, 4, HalOperandType::INT32, desc.m_StrideX, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, desc.m_StrideY, model, data) || + !GetInputScalar(operation, 6, HalOperandType::INT32, numGroups, model, data) || + !GetInputActivationFunction(operation, 7, activation, model, data)) + { + return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); + } + + const uint32_t inputX = inputInfo.GetShape()[widthIndex]; + const uint32_t inputY = inputInfo.GetShape()[heightIndex]; + + const uint32_t kernelX = weightsShape[widthIndex]; + const uint32_t kernelY = weightsShape[heightIndex]; + + CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, paddingScheme); + CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, paddingScheme); + } + else + { + return Fail("%s: Unsupported number of operation inputs", __func__); + } + + const unsigned int outputChannels = outputShape[channelsIndex]; + + const unsigned int channelsPerGroup = weightsShape[channelsIndex]; + const unsigned int channelMultiplier = outputChannels / numGroups; + + // + // Validate all relevant inputs + // + if (numGroups <= 0) + { + return Fail("%s: Number of groups must be greater than 0. Got: %d", __func__, numGroups); + } + + if (outputChannels % numGroups != 0u) + { + return Fail("%s: Output channels must be divisible by the number of groups", __func__); + } + + // + // Set up Splitter layer + // + unsigned int splitterDimSizes[4] = { inputShape[0], inputShape[1], inputShape[2], inputShape[3] }; + splitterDimSizes[channelsIndex] /= numGroups; // split in depth + + TensorInfo splitterOutputInfo(4, + splitterDimSizes, + inputInfo.GetDataType(), + inputInfo.GetQuantizationScale(), + inputInfo.GetQuantizationOffset()); + + std::vector> splitterOutputInfos(numGroups, std::ref(splitterOutputInfo)); + + ViewsDescriptor splitterDesc(numGroups); + for (unsigned int group = 0u; group < numGroups; ++group) + { + splitterDesc.SetViewOriginCoord(group, channelsIndex, splitterDimSizes[channelsIndex] * group); + for (unsigned int dimIdx = 0u; dimIdx < 4u; dimIdx++) + { + splitterDesc.SetViewSize(group, dimIdx, splitterDimSizes[dimIdx]); + } + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsSplitterSupported, + data.m_Backends, + isSupported, + inputInfo, + splitterOutputInfos, + splitterDesc); + if (!isSupported) + { + return false; + } + + IConnectableLayer* splitterLayer = data.m_Network->AddSplitterLayer(splitterDesc); + if (!splitterLayer) + { + return Fail("%s: Failed to add SplitterLayer", __func__); + } + + input.Connect(splitterLayer->GetInputSlot(0)); + for (unsigned int group = 0u; group < splitterLayer->GetNumOutputSlots(); ++group) + { + splitterLayer->GetOutputSlot(group).SetTensorInfo(splitterOutputInfo); + } + + // + // Set up Convolution2d layers for each group + // + + // Set up group tensor shapes + TensorShape groupInputShape(inputShape); + groupInputShape[channelsIndex] = channelsPerGroup; + + TensorShape groupOutputShape(outputShape); + groupOutputShape[channelsIndex] = 1; + + TensorShape groupWeightsShape(weightsShape); + groupWeightsShape[0] /= channelMultiplier * numGroups; + + TensorShape groupBiasesShape({ 1 }); + + // Set up group tensor infos + TensorInfo groupInputInfo(inputInfo); + groupInputInfo.SetShape(groupInputShape); + + const TensorInfo& weightsInfo = weights.GetInfo(); + TensorInfo groupWeightsInfo(weightsInfo); + groupWeightsInfo.SetShape(groupWeightsShape); + + const TensorInfo& biasesInfo = biases.GetInfo(); + TensorInfo groupBiasesInfo(biasesInfo); + groupBiasesInfo.SetShape(groupBiasesShape); + + TensorInfo groupOutputInfo(outputInfo); + groupOutputInfo.SetShape(groupOutputShape); + + const unsigned int weightsDataTypeSize = GetDataTypeSize(groupWeightsInfo.GetDataType()); + const unsigned int biasesDataTypeSize = GetDataTypeSize(groupBiasesInfo.GetDataType()); + + std::vector convLayers(numGroups * channelMultiplier, nullptr); + for (unsigned int group = 0u; group < numGroups; ++group) + { + for (unsigned int m = 0u; m < channelMultiplier; ++m) + { + auto index = group * channelMultiplier + m; + + const unsigned int weightsDataOffset = groupWeightsShape.GetNumElements() * index * weightsDataTypeSize; + const unsigned int biasesDataOffset = groupBiasesShape.GetNumElements() * index * biasesDataTypeSize; + + if (weightsInfo.HasPerAxisQuantization()) + { + // Extract per-axis quantization scales for group weights + const std::vector& weightsQuantScales = weightsInfo.GetQuantizationScales(); + groupWeightsInfo.SetQuantizationScales( + std::vector(weightsQuantScales.begin() + index, + weightsQuantScales.begin() + index + groupWeightsShape[0])); + + // Extract per-axis quantization scales for group biases + const std::vector& biasesQuantScales = biasesInfo.GetQuantizationScales(); + groupBiasesInfo.SetQuantizationScales( + std::vector(biasesQuantScales.begin() + index, + biasesQuantScales.begin() + index + groupWeightsShape[0])); + } + + // Extract weights and biases data for current group convolution + ConstTensor groupWeights(groupWeightsInfo, + static_cast(reinterpret_cast(weights.GetMemoryArea()) + + weightsDataOffset)); + ConstTensor groupBiases(groupBiasesInfo, + static_cast(reinterpret_cast(biases.GetMemoryArea()) + + biasesDataOffset)); + + isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsConvolution2dSupported, + data.m_Backends, + isSupported, + groupInputInfo, + groupOutputInfo, + desc, + groupWeightsInfo, + Optional(groupBiasesInfo)); + if (!isSupported) + { + return false; + } + + IConnectableLayer* convLayer = + data.m_Network->AddConvolution2dLayer(desc, groupWeights, Optional(groupBiases)); + if (!convLayer) + { + return Fail("%s: AddConvolution2dLayer failed", __func__); + } + + splitterLayer->GetOutputSlot(group).Connect(convLayer->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(groupOutputInfo); + + convLayers[index] = convLayer; + } + } + + // + // Set up Concat layer + // + ConcatDescriptor concatDescriptor(outputInfo.GetShape()[channelsIndex]); + for (unsigned int group = 0u; group < numGroups; ++group) + { + for (unsigned int m = 0u; m < channelMultiplier; ++m) + { + auto index = group * channelMultiplier + m; + concatDescriptor.SetViewOriginCoord(index, channelsIndex, index); + concatDescriptor.SetConcatAxis(channelsIndex); + } + } + + isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsConcatSupported, + data.m_Backends, + isSupported, + std::vector(numGroups * channelMultiplier, &groupOutputInfo), + outputInfo, + concatDescriptor); + if (!isSupported) + { + return false; + } + + IConnectableLayer* concatLayer = data.m_Network->AddConcatLayer(concatDescriptor); + if (!concatLayer) + { + return Fail("%s: AddConcatLayer failed", __func__); + } + + for (unsigned int group = 0u; group < numGroups; ++group) + { + for (unsigned int m = 0u; m < channelMultiplier; ++m) + { + auto index = group * channelMultiplier + m; + convLayers[index]->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(index)); + } + } + concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + // + // Set up Activation layer (if it is set) + // + IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, concatLayer, data); + if (!endLayer) + { + return Fail("%s: ProcessActivation failed", __func__); + } + + return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); +} + +template +bool ConvertInstanceNormalization(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertInstanceNormalization()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Operation has an invalid input 0", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Operation has an invalid output", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + // Determine data type of input tensor + HalOperandType inputType; + if (!GetOperandType(operation, 0, model, inputType)) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + InstanceNormalizationDescriptor desc; + + // Read gamma, beta & epsilon + if (inputType == HalOperandType::TENSOR_FLOAT16) + { + Half fp16Gamma; + Half fp16Beta; + Half fp16Epsilon; + + if (!GetInputScalar(operation, 1, HalOperandType::FLOAT16, fp16Gamma, model, data) || + !GetInputScalar(operation, 2, HalOperandType::FLOAT16, fp16Beta, model, data) || + !GetInputScalar(operation, 3, HalOperandType::FLOAT16, fp16Epsilon, model, data)) + { + return Fail("%s: Operation has invalid inputs (FLOAT16)", __func__); + } + + desc.m_Gamma = static_cast(fp16Gamma); + desc.m_Beta = static_cast(fp16Beta); + desc.m_Eps = static_cast(fp16Epsilon); + } + else if (inputType == HalOperandType::TENSOR_FLOAT32) + { + if (!GetInputScalar(operation, 1, HalOperandType::FLOAT32, desc.m_Gamma, model, data) || + !GetInputScalar(operation, 2, HalOperandType::FLOAT32, desc.m_Beta, model, data) || + !GetInputScalar(operation, 3, HalOperandType::FLOAT32, desc.m_Eps, model, data)) + { + return Fail("%s: Operation has invalid inputs (FLOAT32)", __func__); + } + } + else + { + return Fail("%s: Unsupported input tensor type: %d", __func__, inputType); + } + + desc.m_DataLayout = OptionalDataLayout(operation, 4, model, data); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsInstanceNormalizationSupported, + data.m_Backends, + isSupported, + input.GetTensorInfo(), + outputInfo, + desc); + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddInstanceNormalizationLayer(desc); + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertLogSoftmax(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertLogSoftmax()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Failed to read input 0", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Failed to read output", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + // Determine data type of input tensor + HalOperandType inputType; + if (!GetOperandType(operation, 0, model, inputType)) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + LogSoftmaxDescriptor descriptor; + + // Read beta + if (inputType == HalOperandType::TENSOR_FLOAT16) + { + Half fp16Beta; + if (!GetInputScalar(operation, 1, HalOperandType::FLOAT16, fp16Beta, model, data)) + { + return Fail("%s: Failed to read input 1 (FLOAT16)", __func__); + } + + descriptor.m_Beta = static_cast(fp16Beta); + } + else if (inputType == HalOperandType::TENSOR_FLOAT32) + { + if (!GetInputScalar(operation, 1, HalOperandType::FLOAT32, descriptor.m_Beta, model, data)) + { + return Fail("%s: Failed to read input 1 (FLOAT32)", __func__); + } + } + else + { + return Fail("%s: Unsupported input tensor type: %d", __func__, inputType); + } + + // Read axis + if (!GetInputInt32(operation, 2, descriptor.m_Axis, model, data)) + { + return Fail("%s: Failed to read input 2", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsLogSoftmaxSupported, + data.m_Backends, + isSupported, + input.GetTensorInfo(), + outputInfo, + descriptor); + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddLogSoftmaxLayer(descriptor); + if (!layer) + { + return Fail("%s: AddLogSoftmaxLayer() returned nullptr", __func__); + } + + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertMaximum(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + + ALOGV("HalPolicy::ConvertMaximum()"); + + LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0, model, data); + LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1, model, data); + + if (!input0.IsValid() || !input1.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* outputOperand = GetOutputOperand(operation, 0, model); + if (!outputOperand) + { + return Fail("%s: Could not read output", __func__); + } + + const TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand); + if (IsDynamicTensor(outInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsMaximumSupported, + data.m_Backends, + isSupported, + input0.GetTensorInfo(), + input1.GetTensorInfo(), + outInfo); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddMaximumLayer(); + assert(layer != nullptr); + bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data); + if (!isReshapeSupported) + { + return false; + } + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertMinimum(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + + ALOGV("HalPolicy::ConvertMinimum()"); + + LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0, model, data); + LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1, model, data); + + if (!input0.IsValid() || !input1.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsMinimumSupported, + data.m_Backends, + isSupported, + input0.GetTensorInfo(), + input1.GetTensorInfo(), + outputInfo); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* const layer = data.m_Network->AddMinimumLayer(); + assert(layer != nullptr); + bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data); + if (!isReshapeSupported) + { + return false; + } + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertPadV2(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertPadV2()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Could not read input 0", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + unsigned int rank = inputInfo.GetNumDimensions(); + + PadDescriptor descriptor; + if (!ConvertPaddings(operation, model, data, rank, descriptor)) + { + return Fail("%s: Could not convert paddings", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + // Determine type of padding value + HalOperandType operandType0; + HalOperandType operandType2; + + if (!GetOperandType(operation, 0, model, operandType0) || + !GetOperandType(operation, 2, model, operandType2)) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + // Read value to use for padding + if (operandType0 == HalOperandType::TENSOR_FLOAT16 && operandType2 == HalOperandType::FLOAT16) + { + Half f16PadValue; + if (!GetInputScalar(operation, 2, operandType2, f16PadValue, model, data)) + { + return Fail("%s: Could not read input 2 (FLOAT16)", __func__); + } + + descriptor.m_PadValue = f16PadValue; + } + else if (operandType0 == HalOperandType::TENSOR_FLOAT32 && operandType2 == HalOperandType::FLOAT32) + { + if (!GetInputFloat32(operation, 2, descriptor.m_PadValue, model, data)) + { + return Fail("%s: Could not read input 2 (FLOAT32)", __func__); + } + } + else if (operandType0 == HalOperandType::TENSOR_QUANT8_ASYMM && operandType2 == HalOperandType::INT32) + { + int32_t intPadValue = 0; + if (!GetInputInt32(operation, 2, intPadValue, model, data)) + { + return Fail("%s: Could not read input 2 (INT32)", __func__); + } + descriptor.m_PadValue = intPadValue; + } + else + { + return Fail("%s: Operation has invalid inputs: type mismatch", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsPadSupported, + data.m_Backends, + isSupported, + inputInfo, + outputInfo, + descriptor); + if (!isSupported) + { + return false; + } + + IConnectableLayer* const layer = data.m_Network->AddPadLayer(descriptor); + assert(layer != nullptr); + input.Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertPrelu(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + + ALOGV("HalPolicy::ConvertPrelu()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + LayerInputHandle alpha = ConvertToLayerInputHandle(operation, 1, model, data); + + if (!input.IsValid() || !alpha.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + + if (!output) + { + return Fail("%s: Could not read output", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& alphaInfo = alpha.GetTensorInfo(); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsPreluSupported, + data.m_Backends, + isSupported, + inputInfo, + alphaInfo, + outputInfo); + if (!isSupported) + { + return false; + } + + IConnectableLayer* const layer = data.m_Network->AddPreluLayer(); + + if (!layer) + { + return Fail("%s: AddPreluLayer failed", __func__); + } + + bool isReshapeSupported = BroadcastTensor(input, alpha, layer, data); + if (!isReshapeSupported) + { + return false; + } + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertQuantize(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + + ALOGV("HalPolicy::ConvertQuantize()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid input", __func__); + } + + const HalOperand* const outputOperand = GetOutputOperand(operation, 0, model); + if (!outputOperand) + { + return Fail("%s: Operation has invalid outputs", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsQuantizeSupported, + data.m_Backends, + isSupported, + input.GetTensorInfo(), + outputInfo); + if (!isSupported) + { + return false; + } + + IConnectableLayer* const layer = data.m_Network->AddQuantizeLayer(); + assert(layer != nullptr); + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertQuantizedLstm(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + + ALOGV("HalPolicy::ConvertQuantizedLstm()"); + + //Inputs: + // 0: The input: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBatches, inputSize] + // specifying the input to the LSTM cell. Tensor is quantized with a fixed quantization range of -1, 127/128. + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Could not read input 0: input", __func__); + } + + //13: The previous cell state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT16_SYMM and shape + // [numBatches, outputSize] specifying the cell state from the previous time step of the LSTM cell. + // It is quantized using a quantization range of -2^4, 2^4 * 32767/32768. + LayerInputHandle previousCellStateIn = ConvertToLayerInputHandle(operation, 13, model, data); + if (!previousCellStateIn.IsValid()) + { + return Fail("%s: Could not read input 13: previousCellStateIn", __func__); + } + + // 14: The previous output state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [numBathes, outputSize] specifying the output of the LSTM cell from previous time-step. Tensor + // is quantized with a fixed quantization range of -1, 127/128. + LayerInputHandle previousOutputIn = ConvertToLayerInputHandle(operation, 14, model, data); + if (!previousOutputIn.IsValid()) + { + return Fail("%s: Could not read input 14: previousOutputIn", __func__); + } + + // Get the input tensors: + // 1: The input-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-input part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToInputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 1, model, data); + + // 2: The input-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-forget part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToForgetWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 2, model, data); + + // 3: The input-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-cell part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToCellWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 3, model, data); + + // 4: The input-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-output part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToOutputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 4, model, data); + + // 5: The recurrent-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-input part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToInputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 5, model, data); + + // 6: The recurrent-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-forget part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToForgetWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 6, model, data); + + // 7: The recurrent-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-cell part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToCellWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 7, model, data); + + // 8: The recurrent-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-output part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToOutputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 8, model, data); + + // 9: The input gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the + // bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product + // of input and weights scales and zeroPoint equal to 0. + const ConstTensorPin inputGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 9, model, data); + + // 10: The forget gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying + // the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product + // of input and weights scales and zeroPoint equal to 0. + const ConstTensorPin forgetGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 10, model, data); + + // 11:The cell bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the bias + // for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product of input + // and weights scales and zeroPoint equal to 0. + const ConstTensorPin cellBiasPin = + ConvertOperationInputToConstTensorPin(operation, 11, model, data); + + // 12:The output gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying + // the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product + // of input and weights scales and zeroPoint equal to 0. + const ConstTensorPin outputGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 12, model, data); + + if (!inputToInputWeightsPin.IsValid() || + !inputToForgetWeightsPin.IsValid() || + !inputToCellWeightsPin.IsValid() || + !inputToOutputWeightsPin.IsValid() || + !recurrentToInputWeightsPin.IsValid() || + !recurrentToForgetWeightsPin.IsValid() || + !recurrentToCellWeightsPin.IsValid() || + !recurrentToOutputWeightsPin.IsValid() || + !inputGateBiasPin.IsValid() || + !forgetGateBiasPin.IsValid() || + !cellBiasPin.IsValid() || + !outputGateBiasPin.IsValid()) + { + return Fail("%s: Operation has invalid tensor inputs", __func__); + } + + // Outputs: + // 0: The cell state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT16_SYMM and shape [numBatches, outputSize] + // which contains a cell state from the current time step. Tensor is quantized using a quantization range + // of -2^4, 2^4 * 32767/32768. + const HalOperand* cellStateOut = GetOutputOperand(operation, 0, model); + if (!cellStateOut) + { + return Fail("%s: Could not read output 0: cellStateOut", __func__); + } + + // 1: The output: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBathes, outputSize] which + // contains the output value. Tensor is quantized with a fixed quantization range of -1, 127/128. + const HalOperand* output = GetOutputOperand(operation, 1, model); + if (!output) + { + return Fail("%s: Could not read output 1: output", __func__); + } + + // Inputs + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& previousCellStateInInfo = previousCellStateIn.GetTensorInfo(); + const TensorInfo& previousOutputInInfo = previousOutputIn.GetTensorInfo(); + + // Outputs + const TensorInfo& cellStateOutInfo = GetTensorInfoForOperand(*cellStateOut); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + // Dynamic tensors currently not supported + if (IsDynamicTensor(cellStateOutInfo) || IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + QuantizedLstmInputParams params; + + params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr(); + params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr(); + params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr(); + params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr(); + params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr(); + params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr(); + params.m_CellBias = cellBiasPin.GetConstTensorPtr(); + params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr(); + + QuantizedLstmInputParamsInfo paramsInfo; + paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo()); + paramsInfo.m_InputToForgetWeights = &(params.m_InputToForgetWeights->GetInfo()); + paramsInfo.m_InputToCellWeights = &(params.m_InputToCellWeights->GetInfo()); + paramsInfo.m_InputToOutputWeights = &(params.m_InputToOutputWeights->GetInfo()); + paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo()); + paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo()); + paramsInfo.m_RecurrentToCellWeights = &(params.m_RecurrentToCellWeights->GetInfo()); + paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo()); + paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo()); + paramsInfo.m_ForgetGateBias = &(params.m_ForgetGateBias->GetInfo()); + paramsInfo.m_CellBias = &(params.m_CellBias->GetInfo()); + paramsInfo.m_OutputGateBias = &(params.m_OutputGateBias->GetInfo()); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsQuantizedLstmSupported, + data.m_Backends, + isSupported, + inputInfo, + previousCellStateInInfo, + previousOutputInInfo, + cellStateOutInfo, + outputInfo, + paramsInfo); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* const layer = data.m_Network->AddQuantizedLstmLayer(params, "QuantizedLstm"); + input.Connect(layer->GetInputSlot(0)); + previousCellStateIn.Connect(layer->GetInputSlot(1)); + previousOutputIn.Connect(layer->GetInputSlot(2)); + + return (SetupAndTrackLayerOutputSlot(operation, 0, *layer, 0, model, data) && + SetupAndTrackLayerOutputSlot(operation, 1, *layer, 1, model, data)); +} + +template +bool ConvertResize(const HalOperation& operation, + const HalModel& model, + ConversionData& data, + ResizeMethod resizeMethod) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + ALOGV("HalPolicy::ConvertResize()"); + ALOGV("resizeMethod = %s", GetResizeMethodAsCString(resizeMethod)); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Could not read input 0", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + ResizeDescriptor descriptor; + descriptor.m_Method = resizeMethod; + descriptor.m_DataLayout = OptionalDataLayout(operation, 3, model, data); + + HalOperandType operandType1; + HalOperandType operandType2; + + if (!GetOperandType(operation, 1, model, operandType1) || + !GetOperandType(operation, 2, model, operandType2)) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + if (operandType1 != operandType2) + { + return Fail("%s: Operation has invalid inputs. Type of input 1 and 2 should be the same", __func__); + } + + if (operandType1 == HalOperandType::INT32) + { + // Case 1: resizing by shape + int32_t targetWidth = 0; + int32_t targetHeight = 0; + + if (!GetInputInt32(operation, 1, targetWidth, model, data) || + !GetInputInt32(operation, 2, targetHeight, model, data)) + { + return Fail("%s: Operation has invalid inputs for resizing by shape", __func__); + } + + if (targetWidth < 0 || targetHeight < 0) + { + return Fail("%s: Operation has invalid inputs for resizing by shape. " + "Target width/height cannot be < 0", __func__); + } + + descriptor.m_TargetWidth = static_cast(targetWidth); + descriptor.m_TargetHeight = static_cast(targetHeight); + } + else if (operandType1 == HalOperandType::FLOAT32) + { + // Case 2: resizing by scale + float widthScale = 1.0f; + float heightScale = 1.0f; + + if (!GetInputFloat32(operation, 1, widthScale, model, data) || + !GetInputFloat32(operation, 2, heightScale, model, data)) + { + return Fail("%s: Operation has invalid inputs for resizing by scale", __func__); + } + + const TensorShape& inputShape = inputInfo.GetShape(); + armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout); + + float width = inputShape[dataLayoutIndexed.GetWidthIndex()]; + float height = inputShape[dataLayoutIndexed.GetHeightIndex()]; + + descriptor.m_TargetWidth = std::floor(width * widthScale); + descriptor.m_TargetHeight = std::floor(height * heightScale); + } + else if (operandType1 == HalOperandType::FLOAT16) + { + Half widthScale; + Half heightScale; + + if (!GetInputScalar(operation, 1, HalOperandType::FLOAT16, widthScale, model, data) || + !GetInputScalar(operation, 2, HalOperandType::FLOAT16, heightScale, model, data)) + { + return Fail("%s: Operation has invalid inputs for resizing by scale", __func__); + } + + const TensorShape& inputShape = inputInfo.GetShape(); + armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout); + + Half width = static_cast(inputShape[dataLayoutIndexed.GetWidthIndex()]); + Half height = static_cast(inputShape[dataLayoutIndexed.GetHeightIndex()]); + + descriptor.m_TargetWidth = std::floor(width * widthScale); + descriptor.m_TargetHeight = std::floor(height * heightScale); + } + else + { + return Fail("%s: Operand has invalid data type for resizing by scale", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsResizeSupported, + data.m_Backends, + isSupported, + inputInfo, + outputInfo, + descriptor); + + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddResizeLayer(descriptor); + + assert(layer != nullptr); + + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertSpaceToDepth(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertSpaceToDepth()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid() ) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + unsigned int rank = inputInfo.GetNumDimensions(); + if (rank != 4) + { + return Fail("%s: Only inputs with rank 4 are supported", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + SpaceToDepthDescriptor desc; + + GetInputScalar(operation, 1, HalOperandType::INT32, desc.m_BlockSize, model, data); + + if (desc.m_BlockSize <= 1) + { + return Fail("%s: Block size must be at least 1 in all dimensions"); + } + + desc.m_DataLayout = OptionalDataLayout(operation, 2, model, data); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsSpaceToDepthSupported, + data.m_Backends, + isSupported, + inputInfo, + outputInfo, + desc); + if (!isSupported) + { + return false; + } + + IConnectableLayer* const layer = data.m_Network->AddSpaceToDepthLayer(desc); + assert(layer != nullptr); + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertSoftmax(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertSoftmax()"); + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* outputOperand = GetOutputOperand(operation, 0, model); + if (!outputOperand) + { + return Fail("%s: Operation has no outputs", __func__); + } + + const TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + SoftmaxDescriptor desc; + if (!GetInputFloat32(operation, 1, desc.m_Beta, model, data)) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + if (operation.inputs.size() > 2 && !GetInputScalar(operation, + 2, + HalOperandType::INT32, + desc.m_Axis, + model, + data)) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + if (input.GetTensorInfo().GetNumDimensions() > 2 || + !(desc.m_Axis == 1 || + (desc.m_Axis < 0 && static_cast(input.GetTensorInfo().GetNumDimensions()) + desc.m_Axis == 1))) + { + return Fail("%s: Unsupported input greater than 2D or axis != 1", __func__); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsSoftmaxSupported, + data.m_Backends, + isSupported, + input.GetTensorInfo(), + outputInfo, + desc); + if (!isSupported) + { + return false; + } + + IConnectableLayer* layer = data.m_Network->AddSoftmaxLayer(desc); + assert(layer != nullptr); + input.Connect(layer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); +} + +template +bool ConvertLstm(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + ALOGV("HalPolicy::ConvertLstm()"); + + // Inputs: + // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where + // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Could not read input 0: input", __func__); + } + // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + LayerInputHandle outputStateIn = ConvertToLayerInputHandle(operation, 18, model, data); + if (!outputStateIn.IsValid()) + { + return Fail("%s: Could not read input 18: outputStateIn", __func__); + } + // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + LayerInputHandle cellStateIn = ConvertToLayerInputHandle(operation, 19, model, data); + if (!cellStateIn.IsValid()) + { + return Fail("%s: Could not read input 19: cellStateIn", __func__); + } + + // Get the mandatory input tensors: + // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + const ConstTensorPin inputToForgetWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 2)); + // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + const ConstTensorPin inputToCellWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 3)); + // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + const ConstTensorPin inputToOutputWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 4)); + // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + const ConstTensorPin recurrentToForgetWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 6)); + // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + const ConstTensorPin recurrentToCellWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 7)); + // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + const ConstTensorPin recurrentToOutputWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 8)); + // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin forgetGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 13, model, data); + // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellBiasPin = + ConvertOperationInputToConstTensorPin(operation, 14, model, data); + // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin outputGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 15, model, data); + + if (!inputToForgetWeightsPin.IsValid() || + !inputToCellWeightsPin.IsValid() || + !inputToOutputWeightsPin.IsValid() || + !recurrentToForgetWeightsPin.IsValid() || + !recurrentToCellWeightsPin.IsValid() || + !recurrentToOutputWeightsPin.IsValid() || + !forgetGateBiasPin.IsValid() || + !cellBiasPin.IsValid() || + !outputGateBiasPin.IsValid()) + { + return Fail("%s: Operation has invalid tensor inputs", __func__); + } + + // Get the optional input tensors: + // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size], where “num_units” corresponds to the number of cell units. + const ConstTensorPin inputToInputWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 1, true)); + // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., + // “num_units”), or the second dimension of the “projection_weights”, if defined. + const ConstTensorPin recurrentToInputWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 5, true)); + // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellToInputWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 9, true)); + // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellToForgetWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 10, true)); + // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellToOutputWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 11, true)); + // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin inputGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, + 12, + model, + data, + g_DontPermute, + nullptr, + true); + + // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [output_size, num_units]. + const ConstTensorPin projectionWeightsPin = + (DequantizeAndMakeConstTensorPin(operation, model, data, 16, true)); + // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. + const ConstTensorPin projectionBiasPin = + ConvertOperationInputToConstTensorPin(operation, + 17, + model, + data, + g_DontPermute, + nullptr, + true); + + if ((!inputToInputWeightsPin.IsValid() && !inputToInputWeightsPin.IsOptional()) || + (!recurrentToInputWeightsPin.IsValid() && !recurrentToInputWeightsPin.IsOptional()) || + (!cellToInputWeightsPin.IsValid() && !cellToInputWeightsPin.IsOptional()) || + (!cellToForgetWeightsPin.IsValid() && !cellToForgetWeightsPin.IsOptional()) || + (!cellToOutputWeightsPin.IsValid() && !cellToOutputWeightsPin.IsOptional()) || + (!inputGateBiasPin.IsValid() && !inputGateBiasPin.IsOptional()) || + (!projectionWeightsPin.IsValid() && !projectionWeightsPin.IsOptional()) || + (!projectionBiasPin.IsValid() && !projectionBiasPin.IsOptional())) + { + return Fail("%s: Operation has invalid tensor inputs", __func__); + } + + // Get the mandatory input scalars (actually 1-D tensors of size 1): + // 20: The activation function: A value indicating the activation function: + // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. + // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. + // If set to 0.0 then clipping is disabled. + // 22: The clipping threshold: for the output from the projection layer, such that values are bound within + // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + ActivationFn activation; + float cellClip; + float projClip; + if (!GetInputActivationFunctionFromTensor(operation, 20, activation, model, data) || + !GetInputScalar(operation, 21, HalOperandType::FLOAT32, cellClip, model, data) || + !GetInputScalar(operation, 22, HalOperandType::FLOAT32, projClip, model, data)) + { + return Fail("%s: Operation has invalid scalar inputs", __func__); + } + + // Get the normalization tensors + // 23: The input layer normalization weights. A 1-D tensor of shape [num_units]. + // Used to rescale normalized inputs to activation at input gate. + const ConstTensorPin inputLayerNormWeightsPin + (DequantizeAndMakeConstTensorPin(operation, model, data, 23, true)); + + // 24: The forget layer normalization weights. A 1-D tensor of shape [num_units]. + // Used to rescale normalized inputs to activation at forget gate. + const ConstTensorPin forgetLayerNormWeightsPin = + ConvertOperationInputToConstTensorPin(operation, + 24, + model, + data, + g_DontPermute, + nullptr, + true); + + // 25: The cell layer normalization weights. A 1-D tensor of shape [num_units]. + // Used to rescale normalized inputs to activation at cell gate. + const ConstTensorPin cellLayerNormWeightsPin = + ConvertOperationInputToConstTensorPin(operation, + 25, + model, + data, + g_DontPermute, + nullptr, + true); + + // 26: The output layer normalization weights. A 1-D tensor of shape [num_units]. + // Used to rescale normalized inputs to activation at output gate. + const ConstTensorPin outputLayerNormWeightsPin = + ConvertOperationInputToConstTensorPin(operation, + 26, + model, + data, + g_DontPermute, + nullptr, + true); + + // Outputs: + // 00: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] + // with CIFG, or [batch_size, num_units * 3] without CIFG. + const HalOperand* scratchBuffer = GetOutputOperand(operation, 0, model); + if (!scratchBuffer) + { + return Fail("%s: Could not read output 0: scratchBuffer", __func__); + } + // 01: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + const HalOperand* outputStateOut = GetOutputOperand(operation, 1, model); + if (!outputStateOut) + { + return Fail("%s: Could not read output 1: outputStateOut", __func__); + } + // 02: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + const HalOperand* cellStateOut = GetOutputOperand(operation, 2, model); + if (!cellStateOut) + { + return Fail("%s: Could not read output 2: cellStateOut", __func__); + } + // 03: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is + // effectively the same as the current “output state (out)” value. + const HalOperand* output = GetOutputOperand(operation, 3, model); + if (!output) + { + return Fail("%s: Could not read output 3: output", __func__); + } + + // set the params structure for the AddLstmLayer call + LstmInputParams params; + params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr(); + params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr(); + params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr(); + params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr(); + params.m_CellToInputWeights = cellToInputWeightsPin.GetConstTensorPtr(); + params.m_CellToForgetWeights = cellToForgetWeightsPin.GetConstTensorPtr(); + params.m_CellToOutputWeights = cellToOutputWeightsPin.GetConstTensorPtr(); + params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr(); + params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr(); + params.m_CellBias = cellBiasPin.GetConstTensorPtr(); + params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr(); + params.m_ProjectionWeights = projectionWeightsPin.GetConstTensorPtr(); + params.m_ProjectionBias = projectionBiasPin.GetConstTensorPtr(); + params.m_InputLayerNormWeights = inputLayerNormWeightsPin.GetConstTensorPtr(); + params.m_ForgetLayerNormWeights = forgetLayerNormWeightsPin.GetConstTensorPtr(); + params.m_CellLayerNormWeights = cellLayerNormWeightsPin.GetConstTensorPtr(); + params.m_OutputLayerNormWeights = outputLayerNormWeightsPin.GetConstTensorPtr(); + + // set the layer descriptor + LstmDescriptor desc; + desc.m_ActivationFunc = activation; + desc.m_ClippingThresCell = cellClip; + desc.m_ClippingThresProj = projClip; + desc.m_CifgEnabled = (params.m_InputToInputWeights == nullptr || + params.m_RecurrentToInputWeights == nullptr || + params.m_InputGateBias == nullptr); + desc.m_PeepholeEnabled = (params.m_CellToForgetWeights != nullptr || + params.m_CellToOutputWeights != nullptr); + desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr); + desc.m_LayerNormEnabled = (params.m_InputLayerNormWeights != nullptr || + params.m_ForgetLayerNormWeights != nullptr || + params.m_CellLayerNormWeights != nullptr || + params.m_OutputLayerNormWeights != nullptr); + + // validate the optional input groups + if (desc.m_CifgEnabled && + (params.m_InputToInputWeights != nullptr || + params.m_RecurrentToInputWeights != nullptr || + params.m_InputGateBias != nullptr)) + { + return Fail("%s: All, or none, of input-to-input weights, recurrent-to-input weights," + " and input gate bias must be provided", __func__); + } + + if (!desc.m_ProjectionEnabled && params.m_ProjectionBias != nullptr) + { + return Fail("%s: projection bias should not be provided without projection weights", __func__); + } + + if (desc.m_PeepholeEnabled && + (params.m_CellToForgetWeights == nullptr || + params.m_CellToOutputWeights == nullptr || + (!desc.m_CifgEnabled && params.m_CellToInputWeights == nullptr))) + { + return Fail("%s: All, or none, of cell-to-forget weights and cell-to-output weights must be provided" + " and, if CIFG is not enabled, cell-to-input weights must also be provided", __func__); + } + + if (desc.m_LayerNormEnabled && + (params.m_ForgetLayerNormWeights == nullptr || + params.m_CellLayerNormWeights == nullptr || + params.m_OutputLayerNormWeights == nullptr || + (!desc.m_CifgEnabled && params.m_InputLayerNormWeights == nullptr))) + { + return Fail("%s: All, or none, of forget-norm weights, cell-norm weights and output-norm weights must be" + " provided and, if CIFG is not enabled, input-norm weights must also be provided", __func__); + } + + // Check if the layer is supported + // Inputs + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& outputStateInInfo = outputStateIn.GetTensorInfo(); + const TensorInfo& cellStateInInfo = cellStateIn.GetTensorInfo(); + + // Outputs + const TensorInfo& scratchBufferInfo = GetTensorInfoForOperand(*scratchBuffer); + const TensorInfo& outputStateOutInfo = GetTensorInfoForOperand(*outputStateOut); + const TensorInfo& cellStateOutInfo = GetTensorInfoForOperand(*cellStateOut); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + // Check if the scratch buffer shape was initialized, + // In some cases the shape could be (0,0) which requires the driver + // to infer the shape and set it up accordingly. + // The code below does that. + TensorInfo fixSbInfo = scratchBufferInfo; + if (IsDynamicTensor(scratchBufferInfo)) + { + auto & s = fixSbInfo.GetShape(); + s[0] = outputStateInInfo.GetShape()[0]; + if (desc.m_CifgEnabled) + { + // 2D tensor with dimensions [num_units * 3, batch_size] with CIFG + s[1] = cellStateOutInfo.GetShape()[1]*3; + } + else + { + // scratch_buffer [num_units * 4, batch_size] without CIFG + s[1] = cellStateOutInfo.GetShape()[1]*4; + } + } + + if (IsDynamicTensor(outputStateOutInfo) || + IsDynamicTensor(cellStateOutInfo) || + IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported %d %d %d %d", __func__, + IsDynamicTensor(scratchBufferInfo), IsDynamicTensor(outputStateOutInfo), + IsDynamicTensor(cellStateOutInfo), IsDynamicTensor(outputInfo)); + } + + // Basic parameters + LstmInputParamsInfo paramsInfo; + paramsInfo.m_InputToForgetWeights = &(params.m_InputToForgetWeights->GetInfo()); + paramsInfo.m_InputToCellWeights = &(params.m_InputToCellWeights->GetInfo()); + paramsInfo.m_InputToOutputWeights = &(params.m_InputToOutputWeights->GetInfo()); + paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo()); + paramsInfo.m_RecurrentToCellWeights = &(params.m_RecurrentToCellWeights->GetInfo()); + paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo()); + paramsInfo.m_ForgetGateBias = &(params.m_ForgetGateBias->GetInfo()); + paramsInfo.m_CellBias = &(params.m_CellBias->GetInfo()); + paramsInfo.m_OutputGateBias = &(params.m_OutputGateBias->GetInfo()); + + // Optional parameters + if (!desc.m_CifgEnabled) + { + paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo()); + paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo()); + if (params.m_CellToInputWeights != nullptr) + { + paramsInfo.m_CellToInputWeights = &(params.m_CellToInputWeights->GetInfo()); + } + paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo()); + } + + if (desc.m_ProjectionEnabled) + { + paramsInfo.m_ProjectionWeights = &(params.m_ProjectionWeights->GetInfo()); + if (params.m_ProjectionBias != nullptr) + { + paramsInfo.m_ProjectionBias = &(params.m_ProjectionBias->GetInfo()); + } + } + + if (desc.m_PeepholeEnabled) + { + paramsInfo.m_CellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo()); + paramsInfo.m_CellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo()); + } + + if (desc.m_LayerNormEnabled) + { + if(!desc.m_CifgEnabled) + { + paramsInfo.m_InputLayerNormWeights = &(params.m_InputLayerNormWeights->GetInfo()); + } + paramsInfo.m_ForgetLayerNormWeights = &(params.m_ForgetLayerNormWeights->GetInfo()); + paramsInfo.m_CellLayerNormWeights = &(params.m_CellLayerNormWeights->GetInfo()); + paramsInfo.m_OutputLayerNormWeights = &(params.m_OutputLayerNormWeights->GetInfo()); + } + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsLstmSupported, + data.m_Backends, + isSupported, + inputInfo, + outputStateInInfo, + cellStateInInfo, + fixSbInfo, + outputStateOutInfo, + cellStateOutInfo, + outputInfo, + desc, + paramsInfo); + if (!isSupported) + { + return false; + } + + // Add the layer + IConnectableLayer* layer = data.m_Network->AddLstmLayer(desc, params, "Lstm"); + + input.Connect(layer->GetInputSlot(0)); + outputStateIn.Connect(layer->GetInputSlot(1)); + cellStateIn.Connect(layer->GetInputSlot(2)); + + + return ( + (IsDynamicTensor(scratchBufferInfo)? + SetupAndTrackLayerOutputSlotAndOverrideTensorInfo( + operation, 0, *layer, 0, model, data,fixSbInfo): + SetupAndTrackLayerOutputSlot( + operation, 0, *layer, 0, model, data)) && + SetupAndTrackLayerOutputSlot(operation, 1, *layer, 1, model, data) && + SetupAndTrackLayerOutputSlot(operation, 2, *layer, 2, model, data) && + SetupAndTrackLayerOutputSlot(operation, 3, *layer, 3, model, data)); +} + +template +bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model, ConversionData& data) +{ + using HalOperand = typename HalPolicy::Operand; + using HalOperandType = typename HalPolicy::OperandType; + + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + + if (!input.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + const HalOperand* output = GetOutputOperand(operation, 0, model); + + if (!output) + { + return Fail("%s: Could not read output 0", __func__); + } + + const TensorInfo& inputInfo = input.GetTensorInfo(); + const TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + if (IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + // ArmNN does not currently support non-fixed weights or bias + // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ] + const HalOperand* weightsOperand = GetInputOperand(operation, 1, model); + + if (weightsOperand == nullptr) + { + return Fail("%s: Operand is invalid", __func__); + } + TransposeConvolution2dDescriptor desc; + desc.m_DataLayout = DataLayout::NHWC; + + // Determine whether padding is implicit or explicit + bool implicitPadding = operation.inputs.size() == 9; + + if (implicitPadding ) + { + desc.m_DataLayout = OptionalDataLayout(operation, 8, model, data); + } + else + { + desc.m_DataLayout = OptionalDataLayout(operation, 10, model, data); + } + + armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout); + unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); + unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); + + const PermutationVector OHWIToOIHW = {0, 2, 3, 1}; + + // The shape of the weight is [depth_out, filter_height, filter_width, depth_in]. + // We have to permute it to OIHW if the data layout is NCHW. + const ConstTensorPin weightsPin = (desc.m_DataLayout == DataLayout::NCHW) ? + ConvertOperationInputToConstTensorPin(operation, 1, + model, data, OHWIToOIHW) : + ConvertOperationInputToConstTensorPin(operation, 1, model, data); + + // Bias is a 1D tensor + const ConstTensorPin biasPin = + ConvertOperationInputToConstTensorPin(operation, 2, model, data); + + if (!weightsPin.IsValid()) + { + return Fail("%s: Operation has invalid weights", __func__); + } + + if (!biasPin.IsValid()) + { + return Fail("%s: Operation has invalid biases", __func__); + } + + ConstTensor weights = weightsPin.GetConstTensor(); + ConstTensor bias = biasPin.GetConstTensor(); + SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo); + + ActivationFn activation; + + if (implicitPadding) + { + int32_t strideX{0}; + int32_t strideY{0}; + int32_t padLeft{0}; + int32_t padRight{0}; + int32_t padTop{0}; + int32_t padBottom{0}; + + android::nn::PaddingScheme paddingScheme; + if (!GetInputPaddingScheme(operation, 4, paddingScheme, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, strideX, model, data) || + !GetInputScalar(operation, 6, HalOperandType::INT32, strideY, model, data) || + !GetInputActivationFunction(operation, 7, activation, model, data)) + { + return Fail("%s: Operation has invalid inputs (implicit padding)", __func__); + } + + const uint32_t kernelX = weights.GetShape()[widthIndex]; + const uint32_t kernelY = weights.GetShape()[heightIndex]; + const uint32_t outputX = outputInfo.GetShape()[widthIndex]; + const uint32_t outputY = outputInfo.GetShape()[heightIndex]; + + CalcPaddingTransposeConv(outputX, kernelX, strideX, padLeft, padRight, paddingScheme); + CalcPaddingTransposeConv(outputY, kernelY, strideY, padTop, padBottom, paddingScheme); + + // NOTE: The Android NN API allows for negative padding values in TransposeConv2d, + // but Arm NN only supports values >= 0 + if (padLeft < 0 || padRight < 0 || padTop < 0 || padBottom < 0) + { + return Fail("%s: Negative padding values are not supported", __func__); + } + + desc.m_StrideX = boost::numeric_cast(strideX); + desc.m_StrideY = boost::numeric_cast(strideY); + desc.m_PadLeft = boost::numeric_cast(padLeft); + desc.m_PadRight = boost::numeric_cast(padRight); + desc.m_PadTop = boost::numeric_cast(padTop); + desc.m_PadBottom = boost::numeric_cast(padBottom); + } + else if (operation.inputs.size() == 11) + { + // explicit padding + if (!GetInputScalar(operation, 3, HalOperandType::INT32, desc.m_PadLeft, model, data) || + !GetInputScalar(operation, 4, HalOperandType::INT32, desc.m_PadRight, model, data) || + !GetInputScalar(operation, 5, HalOperandType::INT32, desc.m_PadTop, model, data) || + !GetInputScalar(operation, 6, HalOperandType::INT32, desc.m_PadBottom, model, data) || + !GetInputScalar(operation, 7, HalOperandType::INT32, desc.m_StrideX, model, data) || + !GetInputScalar(operation, 8, HalOperandType::INT32, desc.m_StrideY, model, data) || + !GetInputActivationFunction(operation, 9, activation, model, data)) + { + return Fail("%s: Operation has invalid inputs (explicit padding)", __func__); + } + } + else + { + return Fail("%s: Unsupported number of operation inputs", __func__); + } + + desc.m_BiasEnabled = true; + Optional biases(bias.GetInfo()); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsTransposeConvolution2dSupported, + data.m_Backends, + isSupported, + inputInfo, + outputInfo, + desc, + weights.GetInfo(), + biases); + if (!isSupported) + { + return false; + } + + IConnectableLayer* startLayer = + data.m_Network->AddTransposeConvolution2dLayer(desc, weights, Optional(bias)); + if (!startLayer) + { + return Fail("%s: AddTransposeConvolution2dLayer failed", __func__); + } + + IConnectableLayer* endLayer = ProcessActivation(outputInfo, activation, startLayer, data); + if (!endLayer) + { + return Fail("%s: ProcessActivation failed", __func__); + } + + input.Connect(startLayer->GetInputSlot(0)); + + return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer, model, data); +} + +} // armnn_driver namespace \ No newline at end of file diff --git a/ModelToINetworkConverter.cpp b/ModelToINetworkConverter.cpp index 05e60462..24fb4903 100644 --- a/ModelToINetworkConverter.cpp +++ b/ModelToINetworkConverter.cpp @@ -6,8 +6,10 @@ #define LOG_TAG "ArmnnDriver" #include "ModelToINetworkConverter.hpp" +#include "Utils.hpp" #include +#include namespace armnn_driver { @@ -62,21 +64,29 @@ void ModelToINetworkConverter::Convert() // add operations to it // track which layer outputs each operand - m_Data.m_OutputSlotForOperand = std::vector(m_Model.operands.size(), nullptr); - + ALOGV("ModelToINetworkConverter::Convert(): m_OutputSlotForOperand"); + m_Data.m_OutputSlotForOperand = std::vector(getMainModel(m_Model).operands.size(), nullptr); try { - for (uint32_t i = 0; i < m_Model.inputIndexes.size(); i++) + ALOGV("ModelToINetworkConverter::Convert(): for getMainModel(m_Model).inputIndexes.size()"); + for (uint32_t i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++) { + ALOGV("ModelToINetworkConverter::Convert(): getMainModel(m_Model).inputIndexes[i]"); // inputs in android nn are represented by operands - uint32_t inputIndex = m_Model.inputIndexes[i]; - const HalOperand& operand = m_Model.operands[inputIndex]; + uint32_t inputIndex = getMainModel(m_Model).inputIndexes[i]; + ALOGV("ModelToINetworkConverter::Convert(): getMainModel(m_Model).operands[inputIndex];"); + const HalOperand& operand = getMainModel(m_Model).operands[inputIndex]; + ALOGV("ModelToINetworkConverter::Convert(): GetTensorInfoForOperand(operand)"); const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand); + ALOGV("ModelToINetworkConverter::Convert(): m_Data.m_Network->AddInputLayer(i)"); armnn::IConnectableLayer* layer = m_Data.m_Network->AddInputLayer(i); + ALOGV("ModelToINetworkConverter::Convert(): layer->GetOutputSlot(0)"); armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0); + ALOGV("ModelToINetworkConverter::Convert(): outputSlot.SetTensorInfo(GetTensorInfoForOperand(operand))"); outputSlot.SetTensorInfo(GetTensorInfoForOperand(operand)); + ALOGV("ModelToINetworkConverter::Convert(): m_Data.m_OutputSlotForOperand[inputIndex] = &outputSlot"); // store for later layers m_Data.m_OutputSlotForOperand[inputIndex] = &outputSlot; } @@ -92,9 +102,9 @@ void ModelToINetworkConverter::Convert() m_ConversionResult = ConversionResult::UnsupportedFeature; } - for (uint32_t operationIdx = 0; operationIdx < m_Model.operations.size(); operationIdx++) + for (uint32_t operationIdx = 0; operationIdx < getMainModel(m_Model).operations.size(); operationIdx++) { - const auto& operation = m_Model.operations[operationIdx]; + const auto& operation = getMainModel(m_Model).operations[operationIdx]; bool ok = true; if (m_ForcedUnsupportedOperations.find(operationIdx) != m_ForcedUnsupportedOperations.end()) @@ -135,11 +145,11 @@ void ModelToINetworkConverter::Convert() { if (m_ConversionResult == ConversionResult::Success) { - for (uint32_t i = 0; i < m_Model.outputIndexes.size(); i++) + for (uint32_t i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++) { // outputs in android nn are represented by operands - uint32_t outputIndex = m_Model.outputIndexes[i]; - const HalOperand& operand = m_Model.operands[outputIndex]; + uint32_t outputIndex = getMainModel(m_Model).outputIndexes[i]; + const HalOperand& operand = getMainModel(m_Model).operands[outputIndex]; const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand); armnn::IConnectableLayer* layer = m_Data.m_Network->AddOutputLayer(i); @@ -178,4 +188,10 @@ template class ModelToINetworkConverter; template class ModelToINetworkConverter; #endif +#ifdef ARMNN_ANDROID_NN_V1_3 +template class ModelToINetworkConverter; +template class ModelToINetworkConverter; +template class ModelToINetworkConverter; +#endif + } // armnn_driver diff --git a/RequestThread.cpp b/RequestThread.cpp index 22a3ac37..50c5161c 100644 --- a/RequestThread.cpp +++ b/RequestThread.cpp @@ -12,6 +12,11 @@ #include "ArmnnPreparedModel_1_2.hpp" #endif +#ifdef ARMNN_ANDROID_NN_V1_3 +#include "ArmnnPreparedModel_1_2.hpp" +#include "ArmnnPreparedModel_1_3.hpp" +#endif + #include #include @@ -151,4 +156,12 @@ template class RequestThread; #endif +#ifdef ARMNN_ANDROID_NN_V1_3 +template class RequestThread; +template class RequestThread; +template class RequestThread; +template class RequestThread; +template class RequestThread; +#endif + } // namespace armnn_driver diff --git a/Utils.cpp b/Utils.cpp index c548f849..8a17b532 100644 --- a/Utils.cpp +++ b/Utils.cpp @@ -103,7 +103,7 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand) return ret; } -#ifdef ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3)// Using ::android::hardware::neuralnetworks::V1_2 armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) { @@ -164,13 +164,74 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) #endif +#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3 + +armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand) +{ + using namespace armnn; + bool perChannel = false; + + DataType type; + switch (operand.type) + { + case V1_3::OperandType::TENSOR_FLOAT32: + type = armnn::DataType::Float32; + break; + case V1_3::OperandType::TENSOR_FLOAT16: + type = armnn::DataType::Float16; + break; + case V1_3::OperandType::TENSOR_QUANT8_ASYMM: + type = armnn::DataType::QAsymmU8; + break; + case V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: + perChannel=true; + ARMNN_FALLTHROUGH; + case V1_3::OperandType::TENSOR_QUANT8_SYMM: + type = armnn::DataType::QSymmS8; + break; + case V1_3::OperandType::TENSOR_QUANT16_SYMM: + type = armnn::DataType::QSymmS16; + break; + case V1_3::OperandType::TENSOR_INT32: + type = armnn::DataType::Signed32; + break; + case V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: + type = armnn::DataType::QAsymmS8; + break; + default: + throw UnsupportedOperand(operand.type); + } + + TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type); + if (perChannel) + { + // ExtraParams is expected to be of type channelQuant + BOOST_ASSERT(operand.extraParams.getDiscriminator() == + V1_3::Operand::ExtraParams::hidl_discriminator::channelQuant); + + auto perAxisQuantParams = operand.extraParams.channelQuant(); + + ret.SetQuantizationScales(perAxisQuantParams.scales); + ret.SetQuantizationDim(MakeOptional(perAxisQuantParams.channelDim)); + } + else + { + ret.SetQuantizationScale(operand.scale); + ret.SetQuantizationOffset(operand.zeroPoint); + } + + return ret; +} + +#endif + std::string GetOperandSummary(const V1_0::Operand& operand) { return android::hardware::details::arrayToString(operand.dimensions, operand.dimensions.size()) + " " + toString(operand.type); } -#ifdef ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2 std::string GetOperandSummary(const V1_2::Operand& operand) { @@ -180,6 +241,16 @@ std::string GetOperandSummary(const V1_2::Operand& operand) #endif +#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3 + +std::string GetOperandSummary(const V1_3::Operand& operand) +{ + return android::hardware::details::arrayToString(operand.dimensions, operand.dimensions.size()) + " " + + toString(operand.type); +} + +#endif + using DumpElementFunction = void (*)(const armnn::ConstTensor& tensor, unsigned int elementIndex, std::ofstream& fileStream); @@ -449,6 +520,27 @@ void RenameGraphDotFile(const std::string& oldName, const std::string& dumpDir, } } +void CommitPools(std::vector<::android::nn::RunTimePoolInfo>& memPools) +{ + if (memPools.empty()) + { + return; + } + // Commit output buffers. + // Note that we update *all* pools, even if they aren't actually used as outputs - + // this is simpler and is what the CpuExecutor does. + for (auto& pool : memPools) + { + // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where + // update() has been removed and flush() added. +#if defined(ARMNN_ANDROID_R) // Use the new Android implementation. + pool.flush(); +#else + pool.update(); +#endif + } +} + } // namespace armnn_driver diff --git a/Utils.hpp b/Utils.hpp index 6256655f..b61ddb21 100644 --- a/Utils.hpp +++ b/Utils.hpp @@ -19,11 +19,16 @@ #include namespace V1_0 = ::android::hardware::neuralnetworks::V1_0; +namespace V1_1 = ::android::hardware::neuralnetworks::V1_1; -#ifdef ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) namespace V1_2 = ::android::hardware::neuralnetworks::V1_2; #endif +#ifdef ARMNN_ANDROID_NN_V1_3 +namespace V1_3 = ::android::hardware::neuralnetworks::V1_3; +#endif + namespace armnn_driver { @@ -31,6 +36,17 @@ namespace armnn_driver using DataLocation = ::android::nn::hal::DataLocation; #endif +inline const V1_0::Model& getMainModel(const V1_0::Model& model) { return model; } +inline const V1_1::Model& getMainModel(const V1_1::Model& model) { return model; } + +#if defined (ARMNN_ANDROID_NN_V1_2) || defined (ARMNN_ANDROID_NN_V1_3) +inline const V1_2::Model& getMainModel(const V1_2::Model& model) { return model; } +#endif + +#ifdef ARMNN_ANDROID_NN_V1_3 +inline const V1_3::Subgraph& getMainModel(const V1_3::Model& model) { return model.main; } +#endif + extern const armnn::PermutationVector g_DontPermute; template @@ -56,42 +72,53 @@ void* GetMemoryFromPool(DataLocation location, /// Can throw UnsupportedOperand armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand); -#ifdef ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2 armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand); #endif +#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3 +armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand); +#endif + std::string GetOperandSummary(const V1_0::Operand& operand); -#ifdef ARMNN_ANDROID_NN_V1_2 // Using ::android::hardware::neuralnetworks::V1_2 +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2 std::string GetOperandSummary(const V1_2::Operand& operand); #endif +#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3 +std::string GetOperandSummary(const V1_3::Operand& operand); +#endif + template std::string GetModelSummary(const HalModel& model) { std::stringstream result; - result << model.inputIndexes.size() << " input(s), " << model.operations.size() << " operation(s), " << - model.outputIndexes.size() << " output(s), " << model.operands.size() << " operand(s)" << std::endl; + result << getMainModel(model).inputIndexes.size() << " input(s), " + << getMainModel(model).operations.size() << " operation(s), " + << getMainModel(model).outputIndexes.size() << " output(s), " + << getMainModel(model).operands.size() << " operand(s) " + << std::endl; result << "Inputs: "; - for (uint32_t i = 0; i < model.inputIndexes.size(); i++) + for (uint32_t i = 0; i < getMainModel(model).inputIndexes.size(); i++) { - result << GetOperandSummary(model.operands[model.inputIndexes[i]]) << ", "; + result << GetOperandSummary(getMainModel(model).operands[getMainModel(model).inputIndexes[i]]) << ", "; } result << std::endl; result << "Operations: "; - for (uint32_t i = 0; i < model.operations.size(); i++) + for (uint32_t i = 0; i < getMainModel(model).operations.size(); i++) { - result << toString(model.operations[i].type).c_str() << ", "; + result << toString(getMainModel(model).operations[i].type).c_str() << ", "; } result << std::endl; result << "Outputs: "; - for (uint32_t i = 0; i < model.outputIndexes.size(); i++) + for (uint32_t i = 0; i < getMainModel(model).outputIndexes.size(); i++) { - result << GetOperandSummary(model.operands[model.outputIndexes[i]]) << ", "; + result << GetOperandSummary(getMainModel(model).operands[getMainModel(model).outputIndexes[i]]) << ", "; } result << std::endl; @@ -118,4 +145,29 @@ bool IsDynamicTensor(const armnn::TensorInfo& outputInfo); std::string GetFileTimestamp(); +#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) +inline V1_2::OutputShape ComputeShape(const armnn::TensorInfo& info) +{ + V1_2::OutputShape shape; + + android::hardware::hidl_vec dimensions; + + armnn::TensorShape tensorShape = info.GetShape(); + const unsigned int numDims = tensorShape.GetNumDimensions(); + dimensions.resize(numDims); + + for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) + { + dimensions[outputIdx] = tensorShape[outputIdx]; + } + + shape.dimensions = dimensions; + shape.isSufficient = true; + + return shape; +} +#endif + +void CommitPools(std::vector<::android::nn::RunTimePoolInfo>& memPools); + } // namespace armnn_driver diff --git a/android.hardware.neuralnetworks@1.3-service-armnn.rc b/android.hardware.neuralnetworks@1.3-service-armnn.rc new file mode 100644 index 00000000..3f84d9c8 --- /dev/null +++ b/android.hardware.neuralnetworks@1.3-service-armnn.rc @@ -0,0 +1,4 @@ +service neuralnetworks_hal_service_armnn /vendor/bin/hw/android.hardware.neuralnetworks@1.3-service-armnn + class hal + user system + group system diff --git a/test/Convolution2D.hpp b/test/Convolution2D.hpp index 002677fe..38216f10 100644 --- a/test/Convolution2D.hpp +++ b/test/Convolution2D.hpp @@ -32,9 +32,7 @@ namespace driverTestHelpers void SetModelFp16Flag(V1_0::Model& model, bool fp16Enabled); -#if defined(ARMNN_ANDROID_NN_V1_1) || defined(ARMNN_ANDROID_NN_V1_2) void SetModelFp16Flag(V1_1::Model& model, bool fp16Enabled); -#endif template void PaddingTestImpl(android::nn::PaddingScheme paddingScheme, bool fp16Enabled = false) -- cgit v1.2.1