aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2021-09-14 14:56:48 +0200
committerPatrik Gustavsson <patrik.gustavsson@arm.com>2021-09-15 13:21:16 +0200
commitf436ada9caea87ec2dd686a92e41a15c1dcdeb1d (patch)
tree5d3c136de06e0ee54833d2a379eb48cfd12ccd75
parent0957e3ef4b94f17efb67429c88bab8ba650f78e8 (diff)
downloadethos-u-vela-f436ada9caea87ec2dd686a92e41a15c1dcdeb1d.tar.gz
TOSA: Support for TABLE operator (int8)
Added support to map TABLE operator to LUT. Limitations: -Only supported for int8 -TABLE input must be constant This also adds the support for TFLite legalisation of Tanh/Sigmoid (int8/uint8). Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: I1a95f61fb02fdd42c4a690494418cc0765c8b275
-rw-r--r--ethosu/vela/graph_optimiser_util.py31
-rw-r--r--ethosu/vela/operation.py1
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py30
-rw-r--r--ethosu/vela/tosa_graph_optimiser.py22
-rw-r--r--ethosu/vela/tosa_mapping.py4
-rw-r--r--ethosu/vela/tosa_supported_operators.py24
6 files changed, 76 insertions, 36 deletions
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index dafd2849..d2d3d833 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -19,6 +19,7 @@ from typing import Tuple
import numpy as np
+from . import lut
from .data_type import DataType
from .debug_database import DebugDatabase
from .errors import UnsupportedFeatureError
@@ -26,6 +27,8 @@ from .errors import VelaError
from .operation import Op
from .operation_util import create_avgpool_nop
from .shape4d import Shape4D
+from .tensor import create_const_tensor
+from .tensor import QuantizationParameters
memory_only_ops = (
Op.Reshape,
@@ -320,3 +323,31 @@ def convert_depthwise_to_conv(op, arch, nng):
)
DebugDatabase.add_optimised(op, op)
return op
+
+
+def convert_to_lut(op, lut_values, lut_name):
+ # Rewrite the operation by Add with scalar 0 + LUT activation
+ ifm = op.inputs[0]
+ if ifm is None:
+ return op
+ assert ifm.dtype.size_in_bytes() == 1
+ op.type = Op.Add
+ op.name = op.name + "_lut_" + lut_name
+ # Mark as no-op to enable potential fusing optimizations
+ op.attrs["is_nop"] = True
+ # Create an input tensor containing scalar zero
+ quantization = QuantizationParameters(0.0, 255.0)
+ quantization.scale_f32 = ifm.quantization.scale_f32
+ quantization.zero_point = 0
+ tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], np.uint8, quantization=quantization)
+ op.add_input_tensor(tens)
+ op.ifm_shapes.append(Shape4D(tens.shape)) # TODO no shape?
+
+ # The LUT must be applied without any preceding rescaling (the LUT itself performs the rescale),
+ # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
+ # should be the same as the IFM
+ op.forced_output_quantization = ifm.quantization
+ lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
+ op.set_activation_lut(lut_tensor)
+ op.set_ifm_ofm_shapes()
+ return op
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index e9d364ea..1558b943 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -281,6 +281,7 @@ class Op(Enum):
SubgraphInput = OperatorInfo() # Only used in CPU subgraphs
Sum = OperatorInfo()
Svdf = OperatorInfo()
+ Table = OperatorInfo(indices=NNG_IFM_INDICES)
Tanh = OperatorInfo(indices=NNG_IFM_INDICES)
Tile = OperatorInfo()
TopKV2 = OperatorInfo()
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index b48cc7af..cf211de4 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -22,7 +22,6 @@ import uuid
import numpy as np
from . import fp_math
-from . import lut
from . import rewrite_graph
from . import scaling
from .api import NpuRoundingMode
@@ -33,6 +32,7 @@ from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .graph_optimiser_util import bypass_memory_only_ops
from .graph_optimiser_util import calc_explicit_padding
from .graph_optimiser_util import convert_depthwise_to_conv
+from .graph_optimiser_util import convert_to_lut
from .graph_optimiser_util import fix_sg_input_output
from .graph_optimiser_util import memory_only_ops
from .graph_optimiser_util import move_splitsliceread_to_consumer
@@ -858,34 +858,6 @@ def convert_lrelu_to_mul_max(op, arch):
return op
-def convert_to_lut(op, lut_values, lut_name):
- # Rewrite the operation by Add with scalar 0 + LUT activation
- ifm = op.inputs[0]
- if ifm is None:
- return op
- assert ifm.dtype.size_in_bytes() == 1
- op.type = Op.Add
- op.name = op.name + "_lut_" + lut_name
- # Mark as no-op to enable potential fusing optimizations
- op.attrs["is_nop"] = True
- # Create an input tensor containing scalar zero
- quantization = QuantizationParameters(0.0, 255.0)
- quantization.scale_f32 = ifm.quantization.scale_f32
- quantization.zero_point = 0
- tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], np.uint8, quantization=quantization)
- op.add_input_tensor(tens)
- op.ifm_shapes.append(Shape4D(tens.shape))
-
- # The LUT must be applied without any preceding rescaling (the LUT itself performs the rescale),
- # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
- # should be the same as the IFM
- op.forced_output_quantization = ifm.quantization
- lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
- op.set_activation_lut(lut_tensor)
- op.set_ifm_ofm_shapes()
- return op
-
-
def convert_to_lut8(op, fn, fn_name):
# Converts op to a no-op + int8/uint8 LUT which is generated with the given function.
# fn is a function(real) -> real
diff --git a/ethosu/vela/tosa_graph_optimiser.py b/ethosu/vela/tosa_graph_optimiser.py
index a298ddbb..1ef04449 100644
--- a/ethosu/vela/tosa_graph_optimiser.py
+++ b/ethosu/vela/tosa_graph_optimiser.py
@@ -24,6 +24,7 @@ from .debug_database import DebugDatabase
from .graph_optimiser_util import bypass_memory_only_ops
from .graph_optimiser_util import calc_explicit_padding
from .graph_optimiser_util import convert_depthwise_to_conv
+from .graph_optimiser_util import convert_to_lut
from .graph_optimiser_util import move_splitsliceread_to_consumer
from .graph_optimiser_util import needed_total_padding
from .graph_optimiser_util import set_ifm_ofm_op_shapes
@@ -490,13 +491,26 @@ def convert_pad(op, arch, nng):
return add_op
+def convert_table_to_lut(op, arch, nng):
+ # Converts table op to a no-op + LUT
+ if op.type is not Op.Table:
+ return op
+
+ table = op.inputs[1]
+ op.inputs.remove(table)
+ op.set_ifm_ofm_shapes()
+
+ return convert_to_lut(op, table.values, "table")
+
+
def fixup_quantization(op, arch, nng):
if op.ifm and op.ifm.quantization.zero_point is None:
op.ifm.quantization.zero_point = 0
if op.ifm2 and op.ifm2.quantization.zero_point is None:
- op.ifm.quantization.zero_point = 0
- if op.ofm and op.ofm.quantization.zero_point is None:
- op.ofm.quantization.zero_point = 0
+ op.ifm2.quantization.zero_point = 0
+ if not op.forced_output_quantization:
+ if op.ofm and op.ofm.quantization and op.ofm.quantization.zero_point is None:
+ op.ofm.quantization.zero_point = 0
return op
@@ -547,7 +561,7 @@ def tosa_optimise_graph(nng, arch):
)
# Rewite Operators step
- op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv]
+ op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv, convert_table_to_lut]
for idx, sg in enumerate(nng.subgraphs):
nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
diff --git a/ethosu/vela/tosa_mapping.py b/ethosu/vela/tosa_mapping.py
index ebbaa0a1..f80a9156 100644
--- a/ethosu/vela/tosa_mapping.py
+++ b/ethosu/vela/tosa_mapping.py
@@ -196,7 +196,6 @@ unsupported_tosa_operators = {
TosaOp.MAXIMUM,
TosaOp.MINIMUM,
TosaOp.POW,
- TosaOp.TABLE,
TosaOp.ABS,
TosaOp.BITWISE_NOT,
TosaOp.CEIL,
@@ -274,7 +273,8 @@ tosa_operator_map = {
TosaOp.MUL: (Op.Mul, mul_attrs, None, TOSA_IFM_IFM2_INDICES),
# TODO TosaOp.POW
TosaOp.SUB: (Op.Sub, None, None, TOSA_IFM_IFM2_INDICES),
- # TODO TosaOp.TABLE
+ # TODO is table content in input[1] always constant?
+ TosaOp.TABLE: (Op.Table, None, None, TOSA_IFM_INDICES),
# TODO TosaOp.ABS
# TODO TosaOp.BITWISE_NOT
# TODO TosaOp.CEIL
diff --git a/ethosu/vela/tosa_supported_operators.py b/ethosu/vela/tosa_supported_operators.py
index a4f822eb..98df27e3 100644
--- a/ethosu/vela/tosa_supported_operators.py
+++ b/ethosu/vela/tosa_supported_operators.py
@@ -42,7 +42,7 @@ class TosaSupportedOperators:
binary_elem_wise_add_mul_sub = set((Op.Add, Op.Mul, Op.RescaleMul, Op.Sub,))
type_conversion_ops = set((Op.Rescale,))
relu_ops = set((Op.Clamp, Op.ReluN,))
- activation_ops = relu_ops
+ activation_ops = relu_ops | set((Op.Table,))
pad_ops = set((Op.Pad,))
npu_post_ops = activation_ops
@@ -68,6 +68,8 @@ class TosaSupportedOperators:
self.specific_constraints[Op.Transpose].append(TosaSupportedOperators.constraint_ifm_producer)
self.specific_constraints[Op.Pad].append(TosaSupportedOperators.constraint_padding_producer)
+ self.specific_constraints[Op.Table].append(TosaSupportedOperators.constraint_table_dtype)
+ self.specific_constraints[Op.Table].append(TosaSupportedOperators.constraint_table_producer)
# Depthwise Conv specific checks:
for op_type in TosaSupportedOperators.depthwise_convolution_ops:
@@ -200,3 +202,23 @@ class TosaSupportedOperators:
)
return valid, extra
return True, "Op has depth_multiplier=1"
+
+ # TODO Table operator support limited to int8 for now.
+ # For TFLite it is assumed to be constant.
+ @staticmethod
+ def constraint_table_dtype(op):
+ "Only supported is int8"
+ valid = True
+ tensors = [op.ifm, op.ofm, op.inputs[1]]
+ for tens in tensors:
+ if tens.dtype != DataType.int8:
+ valid = False
+ return valid, "Table operator with non int8 tensor"
+
+ # TODO limit table to be constant data for now.
+ # Can it be non-constant?
+ @staticmethod
+ def constraint_table_producer(op):
+ "Input must be constant data"
+ valid = op.inputs[1].ops and op.inputs[1].ops[0].type == Op.Const
+ return valid, "Table Op with non-constant table input"