From 7efb46d283eee8a9cf10f5329176b92b2f830ed6 Mon Sep 17 00:00:00 2001 From: Ellen Norris-Thompson Date: Wed, 24 Jul 2019 17:39:19 +0100 Subject: IVGCVSW-3340 Add support for Quantized_LSTM to HAL1.2 Driver * Added conversion method to HAL1.2 Policy Signed-off-by: Ellen Norris-Thompson Change-Id: Ida6734d34931148add2f3464c3283191ea34b712 --- 1.2/HalPolicy.cpp | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1.2/HalPolicy.hpp | 2 + NnapiSupport.txt | 2 +- 3 files changed, 213 insertions(+), 1 deletion(-) diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp index 477806ef..e084947e 100644 --- a/1.2/HalPolicy.cpp +++ b/1.2/HalPolicy.cpp @@ -149,6 +149,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, return ConvertPrelu(operation, model, data); case V1_2::OperationType::QUANTIZE: return ConvertQuantize(operation, model, data); + case V1_2::OperationType::QUANTIZED_16BIT_LSTM: + return ConvertQuantizedLstm(operation, model, data); case V1_2::OperationType::RELU: return ConvertReLu(operation, model, data); case V1_2::OperationType::RELU1: @@ -811,6 +813,214 @@ bool HalPolicy::ConvertQuantize(const Operation& operation, const Model& model, return SetupAndTrackLayerOutputSlot(operation, 0, *layer, model, data); } +bool HalPolicy::ConvertQuantizedLstm(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertQuantizedLstm()"); + + //Inputs: + // 0: The input: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBatches, inputSize] + // specifying the input to the LSTM cell. Tensor is quantized with a fixed quantization range of -1, 127/128. + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0, model, data); + if (!input.IsValid()) + { + return Fail("%s: Could not read input 0: input", __func__); + } + + //13: The previous cell state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT16_SYMM and shape + // [numBatches, outputSize] specifying the cell state from the previous time step of the LSTM cell. + // It is quantized using a quantization range of -2^4, 2^4 * 32767/32768. + LayerInputHandle previousCellStateIn = ConvertToLayerInputHandle(operation, 13, model, data); + if (!previousCellStateIn.IsValid()) + { + return Fail("%s: Could not read input 13: previousCellStateIn", __func__); + } + + // 14: The previous output state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [numBathes, outputSize] specifying the output of the LSTM cell from previous time-step. Tensor + // is quantized with a fixed quantization range of -1, 127/128. + LayerInputHandle previousOutputIn = ConvertToLayerInputHandle(operation, 14, model, data); + if (!previousOutputIn.IsValid()) + { + return Fail("%s: Could not read input 14: previousOutputIn", __func__); + } + + // Get the input tensors: + // 1: The input-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-input part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToInputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 0, model, data); + + // 2: The input-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-forget part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToForgetWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 1, model, data); + + // 3: The input-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-cell part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToCellWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 2, model, data); + + // 4: The input-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, inputSize] specifying input-to-output part of weights for fully-connected layer inside the + // LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin inputToOutputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 3, model, data); + + // 5: The recurrent-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-input part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToInputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 4, model, data); + + // 6: The recurrent-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-forget part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToForgetWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 5, model, data); + + // 7: The recurrent-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-cell part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToCellWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 6, model, data); + + // 8: The recurrent-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape + // [outputSize, outputSize] specifying recurrent-to-output part of weights for fully-connected layer inside + // the LSTM cell. Quantization zero point and scale must be the same across all the weights. + const ConstTensorPin recurrentToOutputWeightsPin = + ConvertOperationInputToConstTensorPin(operation, 7, model, data); + + // 9: The input gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the + // bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product + // of input and weights scales and zeroPoint equal to 0. + const ConstTensorPin inputGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 8, model, data); + + // 10: The forget gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying + // the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product + // of input and weights scales and zeroPoint equal to 0. + const ConstTensorPin forgetGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 9, model, data); + + // 11:The cell bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the bias + // for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product of input + // and weights scales and zeroPoint equal to 0. + const ConstTensorPin cellBiasPin = + ConvertOperationInputToConstTensorPin(operation, 10, model, data); + + // 12:The output gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying + // the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product + // of input and weights scales and zeroPoint equal to 0. + const ConstTensorPin outputGateBiasPin = + ConvertOperationInputToConstTensorPin(operation, 11, model, data); + + if (!inputToInputWeightsPin.IsValid() || + !inputToForgetWeightsPin.IsValid() || + !inputToCellWeightsPin.IsValid() || + !inputToOutputWeightsPin.IsValid() || + !recurrentToInputWeightsPin.IsValid() || + !recurrentToForgetWeightsPin.IsValid() || + !recurrentToCellWeightsPin.IsValid() || + !recurrentToOutputWeightsPin.IsValid() || + !inputGateBiasPin.IsValid() || + !forgetGateBiasPin.IsValid() || + !cellBiasPin.IsValid() || + !outputGateBiasPin.IsValid()) + { + return Fail("%s: Operation has invalid tensor inputs", __func__); + } + + // Outputs: + // 0: The cell state: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT16_SYMM and shape [numBatches, outputSize] + // which contains a cell state from the current time step. Tensor is quantized using a quantization range + // of -2^4, 2^4 * 32767/32768. + const Operand* cellStateOut = GetOutputOperand(operation, 0, model); + if (!cellStateOut) + { + return Fail("%s: Could not read output 0: cellStateOut", __func__); + } + + // 1: The output: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBathes, outputSize] which + // contains the output value. Tensor is quantized with a fixed quantization range of -1, 127/128. + const Operand* output = GetOutputOperand(operation, 1, model); + if (!output) + { + return Fail("%s: Could not read output 1: output", __func__); + } + + // Inputs + const armnn::TensorInfo& inputInfo = input.GetTensorInfo(); + const armnn::TensorInfo& previousCellStateInInfo = previousCellStateIn.GetTensorInfo(); + const armnn::TensorInfo& previousOutputInInfo = previousOutputIn.GetTensorInfo(); + + // Outputs + const armnn::TensorInfo& cellStateOutInfo = GetTensorInfoForOperand(*cellStateOut); + const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + // Dynamic tensors currently not supported + if (IsDynamicTensor(cellStateOutInfo) || IsDynamicTensor(outputInfo)) + { + return Fail("%s: Dynamic output tensors are not supported", __func__); + } + + armnn::QuantizedLstmInputParams params; + + params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr(); + params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr(); + params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr(); + params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr(); + params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr(); + params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr(); + params.m_CellBias = cellBiasPin.GetConstTensorPtr(); + params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr(); + + armnn::QuantizedLstmInputParamsInfo paramsInfo; + paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo()); + paramsInfo.m_InputToForgetWeights = &(params.m_InputToForgetWeights->GetInfo()); + paramsInfo.m_InputToCellWeights = &(params.m_InputToCellWeights->GetInfo()); + paramsInfo.m_InputToOutputWeights = &(params.m_InputToOutputWeights->GetInfo()); + paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo()); + paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo()); + paramsInfo.m_RecurrentToCellWeights = &(params.m_RecurrentToCellWeights->GetInfo()); + paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo()); + paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo()); + paramsInfo.m_ForgetGateBias = &(params.m_ForgetGateBias->GetInfo()); + paramsInfo.m_CellBias = &(params.m_CellBias->GetInfo()); + paramsInfo.m_OutputGateBias = &(params.m_OutputGateBias->GetInfo()); + + bool isSupported = false; + FORWARD_LAYER_SUPPORT_FUNC(__func__, + IsQuantizedLstmSupported, + data.m_Backends, + isSupported, + inputInfo, + previousCellStateInInfo, + previousOutputInInfo, + cellStateOutInfo, + outputInfo, + paramsInfo); + + if (!isSupported) + { + return false; + } + + armnn::IConnectableLayer* const layer = data.m_Network->AddQuantizedLstmLayer(params, "QuantizedLstm"); + input.Connect(layer->GetInputSlot(0)); + previousOutputIn.Connect(layer->GetInputSlot(1)); + previousCellStateIn.Connect(layer->GetInputSlot(2)); + + return (SetupAndTrackLayerOutputSlot(operation, 0, *layer, 0, model, data) && + SetupAndTrackLayerOutputSlot(operation, 1, *layer, 1, model, data)); +} + bool HalPolicy::ConvertReLu(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertReLu()"); diff --git a/1.2/HalPolicy.hpp b/1.2/HalPolicy.hpp index 4b1ab073..a268b3de 100644 --- a/1.2/HalPolicy.hpp +++ b/1.2/HalPolicy.hpp @@ -57,6 +57,8 @@ private: static bool ConvertQuantize(const Operation& operation, const Model& model, ConversionData& data); + static bool ConvertQuantizedLstm(const Operation& operation, const Model& model, ConversionData& data); + static bool ConvertReLu(const Operation& operation, const Model& model, ConversionData& data); static bool ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data); diff --git a/NnapiSupport.txt b/NnapiSupport.txt index 4a494305..6f740970 100644 --- a/NnapiSupport.txt +++ b/NnapiSupport.txt @@ -55,6 +55,7 @@ MINIMUM (FLOAT32,QUANT8_ASYMM) PAD_V2 (FLOAT32,QUANT8_ASYMM) PRELU (FLOAT32,QUANT8_ASYMM) QUANTIZE (FLOAT32,QUANT8_ASYMM) +QUANTIZED_16BIT_LSTM (QUANT8_ASYMM) RESIZE_NEAREST_NEIGHBOR (FLOAT32,QUANT8_ASYMM) SOFTMAX (FLOAT32,QUANT8_ASYMM) TRANSPOSE_CONV_2D (FLOAT32,QUANT8_ASYMM) @@ -74,7 +75,6 @@ The following AndroidNN HAL 1.2 operations are currently not supported: CONCATENATION LSTM -QUANTIZED_16BIT_LSTM Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework appropriately and the framework implements those operations using a CPU implementation. -- cgit v1.2.1