aboutsummaryrefslogtreecommitdiff
path: root/1.2
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2019-10-18 16:51:57 +0100
committerMatteo Martincigh <matteo.martincigh@arm.com>2019-11-07 09:22:36 +0000
commitfb45e2f86a6c6ba7ff08554c872c8876820f0a7f (patch)
treeefcf13e9d348663607ade87e97c609093f300c1b /1.2
parent9f0693b41a33d4d17ef016d8a5490cc65a8cfb8a (diff)
downloadandroid-nn-driver-fb45e2f86a6c6ba7ff08554c872c8876820f0a7f.tar.gz
MLCE-133 Fixed ASR hero use-case
* Added workaround in FC to deal with non const weights * Added workaround in LSTM to deal with non const weights Signed-off-by: Pablo Tello <pablo.tello@arm.com> Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com> Change-Id: I854eea6a74a6959606ff25b52a0ed80b3e0a18ab
Diffstat (limited to '1.2')
-rw-r--r--1.2/ArmnnDriverImpl.cpp12
-rw-r--r--1.2/HalPolicy.cpp82
2 files changed, 31 insertions, 63 deletions
diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp
index 8a444e5d..7309c2a1 100644
--- a/1.2/ArmnnDriverImpl.cpp
+++ b/1.2/ArmnnDriverImpl.cpp
@@ -38,6 +38,12 @@ const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
"Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
+const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
+ "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
+const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
+ "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
+
+
const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
@@ -256,6 +262,12 @@ Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runt
.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
});
+ update(&capabilities.operandPerformance, OperandType::TENSOR_QUANT8_SYMM,
+ {
+ .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
+ .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
+ });
+
update(&capabilities.operandPerformance, OperandType::TENSOR_QUANT16_SYMM,
{
.execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp
index 7e9a2233..5d6274fc 100644
--- a/1.2/HalPolicy.cpp
+++ b/1.2/HalPolicy.cpp
@@ -2010,6 +2010,8 @@ bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, Conv
bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, ConversionData& data)
{
+ ALOGV("hal_1_2::HalPolicy::ConvertLstm()");
+
// Inputs:
// 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
// “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
@@ -2035,27 +2037,27 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
// 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, input_size].
const ConstTensorPin inputToForgetWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation, 2, model, data);
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 2));
// 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, input_size].
const ConstTensorPin inputToCellWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation, 3, model, data);
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 3));
// 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, input_size].
const ConstTensorPin inputToOutputWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation, 4, model, data);
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 4));
// 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, output_size].
const ConstTensorPin recurrentToForgetWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation, 6, model, data);
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 6));
// 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, output_size].
const ConstTensorPin recurrentToCellWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation, 7, model, data);
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 7));
// 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, output_size].
const ConstTensorPin recurrentToOutputWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation, 8, model, data);
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 8));
// 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
const ConstTensorPin forgetGateBiasPin =
ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation, 13, model, data);
@@ -2083,56 +2085,21 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
// 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, input_size], where “num_units” corresponds to the number of cell units.
const ConstTensorPin inputToInputWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
- 1,
- model,
- data,
- g_DontPermute,
- nullptr,
- true);
-
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 1, true));
// 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
// “num_units”), or the second dimension of the “projection_weights”, if defined.
const ConstTensorPin recurrentToInputWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
- 5,
- model,
- data,
- g_DontPermute,
- nullptr,
- true);
-
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 5, true));
// 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
const ConstTensorPin cellToInputWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
- 9,
- model,
- data,
- g_DontPermute,
- nullptr,
- true);
-
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 9, true));
// 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
const ConstTensorPin cellToForgetWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
- 10,
- model,
- data,
- g_DontPermute,
- nullptr,
- true);
-
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 10, true));
// 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
const ConstTensorPin cellToOutputWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
- 11,
- model,
- data,
- g_DontPermute,
- nullptr,
- true);
-
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 11, true));
// 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
const ConstTensorPin inputGateBiasPin =
ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
@@ -2146,14 +2113,7 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
// 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
// [output_size, num_units].
const ConstTensorPin projectionWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
- 16,
- model,
- data,
- g_DontPermute,
- nullptr,
- true);
-
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 16, true));
// 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
const ConstTensorPin projectionBiasPin =
ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
@@ -2196,14 +2156,8 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
// Get the normalization tensors
// 23: The input layer normalization weights. A 1-D tensor of shape [num_units].
// Used to rescale normalized inputs to activation at input gate.
- const ConstTensorPin inputLayerNormWeightsPin =
- ConvertOperationInputToConstTensorPin<hal_1_2::HalPolicy>(operation,
- 23,
- model,
- data,
- g_DontPermute,
- nullptr,
- true);
+ const ConstTensorPin inputLayerNormWeightsPin
+ (DequantizeAndMakeConstTensorPin<hal_1_2::HalPolicy>(operation, model, data, 23, true));
// 24: The forget layer normalization weights. A 1-D tensor of shape [num_units].
// Used to rescale normalized inputs to activation at forget gate.
@@ -2357,7 +2311,9 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
IsDynamicTensor(cellStateOutInfo) ||
IsDynamicTensor(outputInfo))
{
- return Fail("%s: Dynamic output tensors are not supported", __func__);
+ return Fail("%s: Dynamic output tensors are not supported %d %d %d %d", __func__,
+ IsDynamicTensor(scratchBufferInfo), IsDynamicTensor(outputStateOutInfo),
+ IsDynamicTensor(cellStateOutInfo), IsDynamicTensor(outputInfo));
}
// Basic parameters