From 9a03fdff316662be69a1adc4e391e43bc6519b08 Mon Sep 17 00:00:00 2001 From: Charles Xu Date: Thu, 2 Jul 2020 15:12:40 +0200 Subject: MLBEDSW-2569:Support 1x1 IFM ResizeBilinear Signed-off-by: Charles Xu Change-Id: I44428d77b2e8e44a477e5c4dfe28ab8dd1792838 --- ethosu/vela/graph_optimiser.py | 38 ++++++++++++++++++++++-- ethosu/vela/npu_serialisation.py | 5 ++-- ethosu/vela/register_command_stream_generator.py | 17 ++++++++++- ethosu/vela/supported_operators.py | 2 ++ 4 files changed, 57 insertions(+), 5 deletions(-) diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index c805be50..355b16ff 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -27,6 +27,7 @@ from .ethos_u55_regs.ethos_u55_regs import resampling_mode from .numeric_util import full_shape from .operation import NpuBlockType from .operation import Operation +from .tensor import QuantizationParameters from .tensor import Tensor passthrough_nodes = set(("Identity",)) @@ -181,6 +182,39 @@ def fixup_conv2d_backprop(op, arch): return op +# Convert the op to an elementwise add +def convert_resizebilinear_1x1_to_add(op): + op.type = "AddAct" + op.name = op.name + "_add" + op.attrs.update({"npu_block_type": NpuBlockType.ElementWise}) + op.attrs["resizebilinear"] = True + # Create an input tensor filled with zeros + shape = op.outputs[0].shape + tens = Tensor(shape, op.inputs[0].dtype, op.inputs[1].name + "_add") + tens.values = np.zeros(shape) + tens.quant_values = np.zeros(shape, np.uint8) + tens.quantization = QuantizationParameters(0.0, 255.0) + tens.quantization.scale_f32 = 1.0 + tens.quantization.zero_point = 0 + tens.consumer_list = [op] + tens_op = op.inputs[1].ops[0] + tens_op.outputs = [tens] + tens.ops = [tens_op] + # Set the add inputs + op.inputs[1] = op.inputs[0] + op.inputs[0] = tens + + return op + + +def fixup_resizebilinear(op, arch): + if op.type == "ResizeBilinear": + if op.inputs[0].shape[1] == 1 and op.inputs[0].shape[2] == 1: + convert_resizebilinear_1x1_to_add(op) + + return op + + def fixup_fully_connected_input(op, arch): if op.type == "FullyConnectedAct": inp = op.inputs[0] @@ -614,8 +648,7 @@ def add_attrs_to_resizebilinear(op, arch): # produce a (M * 2 - 1, N * 2 - 1) sized output op.attrs["padding"] = b"VALID" else: - # If this exception is raised, something is wrong with the supported op check - raise UnsupportedFeatureError("Unsupported upscaling factor") + return op input_tensor.resampling_mode = resampling_mode.NEAREST op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 2, 2, 1)}) return op @@ -647,6 +680,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False): mark_npu_block_type, fixup_elementwise_with_scalars, reorder_depthwise_weights, + fixup_resizebilinear, # convert_mul_max_to_abs_or_lrelu # TODO: enable optimisation once quantisation issues are resolved ] diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py index 2d1c6b10..4b5a888f 100644 --- a/ethosu/vela/npu_serialisation.py +++ b/ethosu/vela/npu_serialisation.py @@ -51,8 +51,9 @@ def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor): def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor): start_addr = src_tensor.address - end_addr = start_addr + src_tensor.quant_values.size - memory_tensor.values[start_addr:end_addr] = src_tensor.quant_values + values = src_tensor.quant_values.flatten() + end_addr = start_addr + values.size + memory_tensor.values[start_addr:end_addr] = values def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fast_tens, flash_tens): diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index d32766b0..38b40ba5 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -478,6 +478,11 @@ def generate_register_command_stream(nng, sg, arch, verbose=False): if (faf == "Sigmoid") or (faf == "Tanh"): output_scale = 1 / 0x3000 + # Force output scale same as the input scale for + # resizebiliner 1x1 that is converted to add + if "resizebilinear" in primary_op.attrs: + output_scale = input2_scale + if input_scale == input2_scale: opa_scale, opb_scale, ofm_scale, shift = scaling.simplified_elementwise_add_sub_scale( input_scale, input2_scale, output_scale @@ -860,7 +865,17 @@ def generate_register_command_stream(nng, sg, arch, verbose=False): emit.cmd0_with_param(zero_point_op, 0) else: assert tens.quantization.zero_point is not None, "need an actual zero point set" - emit.cmd0_with_param(zero_point_op, int(tens.quantization.zero_point)) + if ( + "resizebilinear" in primary_op.attrs + and primary_op.type == "AddAct" + and cmd0.NPU_SET_OFM_ZERO_POINT == zero_point_op + ): + # Force output zero point same as the input zero point + # for resizebiliner 1x1 that is converted to add + zero_point = cmd.ifm2_tensor.quantization.zero_point + else: + zero_point = tens.quantization.zero_point + emit.cmd0_with_param(zero_point_op, int(zero_point)) if tens.shape == []: # Empty shape, elementwise constant diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py index e8e8d852..3ec3429a 100644 --- a/ethosu/vela/supported_operators.py +++ b/ethosu/vela/supported_operators.py @@ -236,6 +236,8 @@ class SupportedOperators: def check_resize_restrictions(self, op): # check unsupported upscaling factor if op.type == "ResizeBilinear": + if op.inputs[0].shape[1] == 1 and op.inputs[0].shape[2] == 1: + return True upscaled_shape = [op.inputs[0].shape[1] * 2, op.inputs[0].shape[2] * 2] out_shape = op.outputs[0].shape[1:3] if not op.attrs["align_corners"] and out_shape != upscaled_shape: -- cgit v1.2.1