From 0f98b361288c71fca327969346db32de098c797b Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Tue, 29 Sep 2020 10:00:39 +0200 Subject: [MLBEDSW-2802] Fix 5D tensor crash Fixed crash in networks with 5D tensors. Fixed crash for (int32) tensors without quantization. Added validity checks for concatenation. Moved unfusing of activation function from tflite_reader to graph_optimiser. Signed-off-by: Fredrik Svedberg Change-Id: Ib9ba8891dc95ef5491e15d0feedef44331a26393 --- ethosu/vela/graph_optimiser.py | 15 ++++++++++++++ ethosu/vela/mark_tensors.py | 2 ++ ethosu/vela/npu_serialisation.py | 2 +- ethosu/vela/register_command_stream_generator.py | 14 ++++++------- ethosu/vela/shared_buffer_allocation.py | 5 +++-- ethosu/vela/supported_operators.py | 25 ++++++++++++++++++++++++ ethosu/vela/tflite_reader.py | 17 ---------------- 7 files changed, 53 insertions(+), 27 deletions(-) diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index 2bd57ddd..81d5a188 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -433,6 +433,20 @@ def fixup_pack_input(op, arch): return op +def unfuse_activation_function(op, arch): + unfuse_ops = ("ConcatTFLite",) + if op.type in unfuse_ops and op.run_on_npu and op.attrs.get("fused_activation_function", None) is not None: + act = op.attrs["fused_activation_function"] + del op.attrs["fused_activation_function"] + act_op = Operation(act, op.name + act) + out_tens = op.outputs[0] + intermediate_tens = out_tens.clone("_act_intermediate") + act_op.set_output_tensor(out_tens) + act_op.add_input_tensor(intermediate_tens) + op.set_output_tensor(intermediate_tens) + + return op + def fixup_unpack_output(tens, arch): op = tens.ops[0] if op.type in set(("Unpack", "StridedSlice")): @@ -1087,6 +1101,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False): fixup_fully_connected_input, convert_batched_fc_to_conv, fixup_pack_input, + unfuse_activation_function, fixup_conv2d_backprop, fixup_relus_with_differing_ifm_ofm_scaling, fixup_act_reorder, diff --git a/ethosu/vela/mark_tensors.py b/ethosu/vela/mark_tensors.py index 208b5b8c..a971ef23 100644 --- a/ethosu/vela/mark_tensors.py +++ b/ethosu/vela/mark_tensors.py @@ -367,6 +367,8 @@ def mark_tensor_format(nng, arch, verbose_tensor_format=False): visit_tens(tens, ps) for tens, fmt in formats_for_tensor.items(): + if len(tens.shape) > 4: + continue tens.set_format(fmt, arch) if fmt == TensorFormat.WeightsCompressed and tens.values is not None: src_tens = tens.get_dma_src_tensor() diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py index 6277a6dc..430db585 100644 --- a/ethosu/vela/npu_serialisation.py +++ b/ethosu/vela/npu_serialisation.py @@ -51,7 +51,7 @@ def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor): def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor): start_addr = src_tensor.address - values = src_tensor.quant_values.flatten() + values = src_tensor.quant_values.flatten() if src_tensor.quant_values is not None else src_tensor.values.flatten() if src_tensor.dtype.size_in_bytes() > 1: values = np.frombuffer(values.tobytes(), dtype=np.uint8) end_addr = start_addr + values.size diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index acfd25a2..da9be668 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -483,9 +483,9 @@ def generate_register_command_stream(nng, sg, arch, verbose=False): # Calculate scales needed for arithmetic elementwise operators if primary_op.type in set(("AddAct", "MulAct", "SubAct",)): - input_scale = cmd.ifm_tensor.quantization.scale_f32 - input2_scale = cmd.ifm2_tensor.quantization.scale_f32 - output_scale = ofm_quant.scale_f32 + input_scale = cmd.ifm_tensor.quantization.scale_f32 if cmd.ifm_tensor.quantization else None + input2_scale = cmd.ifm2_tensor.quantization.scale_f32 if cmd.ifm2_tensor.quantization else None + output_scale = ofm_quant.scale_f32 if ofm_quant else None use_global_scale = True if output_scale is not None and faf in ("Sigmoid", "Tanh"): @@ -803,10 +803,10 @@ def generate_register_command_stream(nng, sg, arch, verbose=False): scale_region = base_ptr_idx_map[cmd.scale_tensor.mem_type] emit.cmd0_with_param(cmd0.NPU_SET_SCALE_REGION, scale_region) - ofm_quant_qmin = ofm_quant.quant_min - ofm_quant_qmax = ofm_quant.quant_max - ifm_min = cmd.ifm_tensor.quantization.min - ifm_max = cmd.ifm_tensor.quantization.max + ofm_quant_qmin = ofm_quant.quant_min if ofm_quant else np.iinfo(np.int16).min + ofm_quant_qmax = ofm_quant.quant_max if ofm_quant else np.iinfo(np.int16).max + ifm_min = cmd.ifm_tensor.quantization.min if cmd.ifm_tensor.quantization else np.iinfo(np.int16).min + ifm_max = cmd.ifm_tensor.quantization.max if cmd.ifm_tensor.quantization else np.iinfo(np.int16).max # Emit commands for any fused activation function if faf is None: diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py index 63e2268d..7657dffa 100644 --- a/ethosu/vela/shared_buffer_allocation.py +++ b/ethosu/vela/shared_buffer_allocation.py @@ -38,7 +38,8 @@ class SharedBufferAllocation: ifm_tensor, ifm2_tensor, weight_tensor, ofm_tensor = ps.get_primary_op_ifm_ifm2_weights_ofm() tensors = [t for t in (ifm_tensor, ifm2_tensor, ofm_tensor) if t is not None] - has_scale = None not in (t.quantization.scale_f32 for t in tensors) + scales = [t.quantization.scale_f32 for t in tensors if t.quantization is not None] + has_scale = len(tensors) == len(scales) and not None in scales strides = (1, 1, 1, 1) dilation = (1, 1, 1, 1) @@ -192,7 +193,7 @@ def find_block_configs_suitable_for_pass_and_shared_buffer(arch, ps): # Constrain the search space if the OFM is smaller than the max block size # - Add other block search constraints here if required - if len(alloc.ofm_tensor.shape) == 2: + if len(alloc.ofm_tensor.shape) <= 2: max_block_height = max_block_width = alloc.ofm_tensor.shape[0] else: max_block_width = alloc.ofm_tensor.shape[-2] diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py index 0a1af829..eec1b900 100644 --- a/ethosu/vela/supported_operators.py +++ b/ethosu/vela/supported_operators.py @@ -152,6 +152,9 @@ class SupportedOperators: "placing on CPU", ) return False + if len(t.shape) > 4: + print("Warning:", op.type, "has input(s) of unsupported shape", t.shape, "placing on CPU") + return False for t in op.outputs: if not t.has_fully_defined_shape(): print("Warning:", op.type, "has output(s) of undefined shape, placing on CPU") @@ -165,6 +168,9 @@ class SupportedOperators: "placing on CPU", ) return False + if len(t.shape) > 4: + print("Warning:", op.type, "has output(s) of unsupported shape", t.shape, "placing on CPU") + return False # check data type tensors = [t for t in op.get_ifm_ifm2_weights_ofm() if t is not None] @@ -447,6 +453,25 @@ class SupportedOperators: if num_to_be_inferred > 1: print("Warning:", op.type, "has more than one size to be inferred, which is illegal, placing on CPU") return False + if op.type.find("Concat") != -1: + axis = op.attrs.get("axis", None) + if axis is None: + print("Warning:", op.type, "invalid or missing axis, placing on CPU") + return False + if axis < 0: + axis += len(op.inputs[0].shape) + if not 0 < axis < len(op.inputs[0].shape): + print("Warning:", op.type, "invalid axis", axis, ", placing on CPU") + return False + ofm = op.outputs[0] + ofm_dims = len(ofm.shape) + for ifm in op.inputs: + if len(ifm.shape) != ofm_dims: + return False + for i in range(ofm_dims): + if i != axis and ifm.shape[i] != ofm.shape[i]: + print("Warning:", op.type, "invalid ifm:", ifm.name, ifm.shape, "mismatch in dimension", i, ", placing on CPU") + return False return True diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py index 7458b907..77cc7963 100644 --- a/ethosu/vela/tflite_reader.py +++ b/ethosu/vela/tflite_reader.py @@ -149,8 +149,6 @@ class TFLiteSubgraph: for out in op.outputs: out.ops = [op] - activation_function_to_split_out = None - if op_type.startswith("DepthwiseConv2d") or op_type.startswith("Conv2D"): if inputs[1].values is not None: inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0)) @@ -192,21 +190,6 @@ class TFLiteSubgraph: if "depth_multiplier" in op.attrs: op.attrs["channel_multiplier"] = op.attrs["depth_multiplier"] - if "fused_activation_function" in op.attrs: - if op_type in set(("ConcatTFLite",)): - act = op.attrs["fused_activation_function"] - del op.attrs["fused_activation_function"] - if act is not None: - activation_function_to_split_out = act - - if activation_function_to_split_out is not None: - act_op = Operation(activation_function_to_split_out, name + activation_function_to_split_out) - out_tens = op.outputs[0] - intermediate_tens = out_tens.clone("_act_intermediate") - act_op.set_output_tensor(out_tens) - intermediate_tens.ops = [op] - op.outputs[0] = intermediate_tens - act_op.inputs = [intermediate_tens] @staticmethod def len1_array_to_scalar(arr): -- cgit v1.2.1