aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFredrik Svedberg <fredrik.svedberg@arm.com>2020-09-29 10:00:39 +0200
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2020-09-30 07:52:39 +0000
commit0f98b361288c71fca327969346db32de098c797b (patch)
tree8b2905a6e763832a0029179d655c481b14e0a8a1
parent0265f402c7ae1e875470298b4130fcc2f7ab4e23 (diff)
downloadethos-u-vela-0f98b361288c71fca327969346db32de098c797b.tar.gz
[MLBEDSW-2802] Fix 5D tensor crash
Fixed crash in networks with 5D tensors. Fixed crash for (int32) tensors without quantization. Added validity checks for concatenation. Moved unfusing of activation function from tflite_reader to graph_optimiser. Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com> Change-Id: Ib9ba8891dc95ef5491e15d0feedef44331a26393
-rw-r--r--ethosu/vela/graph_optimiser.py15
-rw-r--r--ethosu/vela/mark_tensors.py2
-rw-r--r--ethosu/vela/npu_serialisation.py2
-rw-r--r--ethosu/vela/register_command_stream_generator.py14
-rw-r--r--ethosu/vela/shared_buffer_allocation.py5
-rw-r--r--ethosu/vela/supported_operators.py25
-rw-r--r--ethosu/vela/tflite_reader.py17
7 files changed, 53 insertions, 27 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 2bd57ddd..81d5a188 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -433,6 +433,20 @@ def fixup_pack_input(op, arch):
return op
+def unfuse_activation_function(op, arch):
+ unfuse_ops = ("ConcatTFLite",)
+ if op.type in unfuse_ops and op.run_on_npu and op.attrs.get("fused_activation_function", None) is not None:
+ act = op.attrs["fused_activation_function"]
+ del op.attrs["fused_activation_function"]
+ act_op = Operation(act, op.name + act)
+ out_tens = op.outputs[0]
+ intermediate_tens = out_tens.clone("_act_intermediate")
+ act_op.set_output_tensor(out_tens)
+ act_op.add_input_tensor(intermediate_tens)
+ op.set_output_tensor(intermediate_tens)
+
+ return op
+
def fixup_unpack_output(tens, arch):
op = tens.ops[0]
if op.type in set(("Unpack", "StridedSlice")):
@@ -1087,6 +1101,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
fixup_fully_connected_input,
convert_batched_fc_to_conv,
fixup_pack_input,
+ unfuse_activation_function,
fixup_conv2d_backprop,
fixup_relus_with_differing_ifm_ofm_scaling,
fixup_act_reorder,
diff --git a/ethosu/vela/mark_tensors.py b/ethosu/vela/mark_tensors.py
index 208b5b8c..a971ef23 100644
--- a/ethosu/vela/mark_tensors.py
+++ b/ethosu/vela/mark_tensors.py
@@ -367,6 +367,8 @@ def mark_tensor_format(nng, arch, verbose_tensor_format=False):
visit_tens(tens, ps)
for tens, fmt in formats_for_tensor.items():
+ if len(tens.shape) > 4:
+ continue
tens.set_format(fmt, arch)
if fmt == TensorFormat.WeightsCompressed and tens.values is not None:
src_tens = tens.get_dma_src_tensor()
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index 6277a6dc..430db585 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py
@@ -51,7 +51,7 @@ def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor):
def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor):
start_addr = src_tensor.address
- values = src_tensor.quant_values.flatten()
+ values = src_tensor.quant_values.flatten() if src_tensor.quant_values is not None else src_tensor.values.flatten()
if src_tensor.dtype.size_in_bytes() > 1:
values = np.frombuffer(values.tobytes(), dtype=np.uint8)
end_addr = start_addr + values.size
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index acfd25a2..da9be668 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -483,9 +483,9 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
# Calculate scales needed for arithmetic elementwise operators
if primary_op.type in set(("AddAct", "MulAct", "SubAct",)):
- input_scale = cmd.ifm_tensor.quantization.scale_f32
- input2_scale = cmd.ifm2_tensor.quantization.scale_f32
- output_scale = ofm_quant.scale_f32
+ input_scale = cmd.ifm_tensor.quantization.scale_f32 if cmd.ifm_tensor.quantization else None
+ input2_scale = cmd.ifm2_tensor.quantization.scale_f32 if cmd.ifm2_tensor.quantization else None
+ output_scale = ofm_quant.scale_f32 if ofm_quant else None
use_global_scale = True
if output_scale is not None and faf in ("Sigmoid", "Tanh"):
@@ -803,10 +803,10 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
scale_region = base_ptr_idx_map[cmd.scale_tensor.mem_type]
emit.cmd0_with_param(cmd0.NPU_SET_SCALE_REGION, scale_region)
- ofm_quant_qmin = ofm_quant.quant_min
- ofm_quant_qmax = ofm_quant.quant_max
- ifm_min = cmd.ifm_tensor.quantization.min
- ifm_max = cmd.ifm_tensor.quantization.max
+ ofm_quant_qmin = ofm_quant.quant_min if ofm_quant else np.iinfo(np.int16).min
+ ofm_quant_qmax = ofm_quant.quant_max if ofm_quant else np.iinfo(np.int16).max
+ ifm_min = cmd.ifm_tensor.quantization.min if cmd.ifm_tensor.quantization else np.iinfo(np.int16).min
+ ifm_max = cmd.ifm_tensor.quantization.max if cmd.ifm_tensor.quantization else np.iinfo(np.int16).max
# Emit commands for any fused activation function
if faf is None:
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py
index 63e2268d..7657dffa 100644
--- a/ethosu/vela/shared_buffer_allocation.py
+++ b/ethosu/vela/shared_buffer_allocation.py
@@ -38,7 +38,8 @@ class SharedBufferAllocation:
ifm_tensor, ifm2_tensor, weight_tensor, ofm_tensor = ps.get_primary_op_ifm_ifm2_weights_ofm()
tensors = [t for t in (ifm_tensor, ifm2_tensor, ofm_tensor) if t is not None]
- has_scale = None not in (t.quantization.scale_f32 for t in tensors)
+ scales = [t.quantization.scale_f32 for t in tensors if t.quantization is not None]
+ has_scale = len(tensors) == len(scales) and not None in scales
strides = (1, 1, 1, 1)
dilation = (1, 1, 1, 1)
@@ -192,7 +193,7 @@ def find_block_configs_suitable_for_pass_and_shared_buffer(arch, ps):
# Constrain the search space if the OFM is smaller than the max block size
# - Add other block search constraints here if required
- if len(alloc.ofm_tensor.shape) == 2:
+ if len(alloc.ofm_tensor.shape) <= 2:
max_block_height = max_block_width = alloc.ofm_tensor.shape[0]
else:
max_block_width = alloc.ofm_tensor.shape[-2]
diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py
index 0a1af829..eec1b900 100644
--- a/ethosu/vela/supported_operators.py
+++ b/ethosu/vela/supported_operators.py
@@ -152,6 +152,9 @@ class SupportedOperators:
"placing on CPU",
)
return False
+ if len(t.shape) > 4:
+ print("Warning:", op.type, "has input(s) of unsupported shape", t.shape, "placing on CPU")
+ return False
for t in op.outputs:
if not t.has_fully_defined_shape():
print("Warning:", op.type, "has output(s) of undefined shape, placing on CPU")
@@ -165,6 +168,9 @@ class SupportedOperators:
"placing on CPU",
)
return False
+ if len(t.shape) > 4:
+ print("Warning:", op.type, "has output(s) of unsupported shape", t.shape, "placing on CPU")
+ return False
# check data type
tensors = [t for t in op.get_ifm_ifm2_weights_ofm() if t is not None]
@@ -447,6 +453,25 @@ class SupportedOperators:
if num_to_be_inferred > 1:
print("Warning:", op.type, "has more than one size to be inferred, which is illegal, placing on CPU")
return False
+ if op.type.find("Concat") != -1:
+ axis = op.attrs.get("axis", None)
+ if axis is None:
+ print("Warning:", op.type, "invalid or missing axis, placing on CPU")
+ return False
+ if axis < 0:
+ axis += len(op.inputs[0].shape)
+ if not 0 < axis < len(op.inputs[0].shape):
+ print("Warning:", op.type, "invalid axis", axis, ", placing on CPU")
+ return False
+ ofm = op.outputs[0]
+ ofm_dims = len(ofm.shape)
+ for ifm in op.inputs:
+ if len(ifm.shape) != ofm_dims:
+ return False
+ for i in range(ofm_dims):
+ if i != axis and ifm.shape[i] != ofm.shape[i]:
+ print("Warning:", op.type, "invalid ifm:", ifm.name, ifm.shape, "mismatch in dimension", i, ", placing on CPU")
+ return False
return True
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index 7458b907..77cc7963 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -149,8 +149,6 @@ class TFLiteSubgraph:
for out in op.outputs:
out.ops = [op]
- activation_function_to_split_out = None
-
if op_type.startswith("DepthwiseConv2d") or op_type.startswith("Conv2D"):
if inputs[1].values is not None:
inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0))
@@ -192,21 +190,6 @@ class TFLiteSubgraph:
if "depth_multiplier" in op.attrs:
op.attrs["channel_multiplier"] = op.attrs["depth_multiplier"]
- if "fused_activation_function" in op.attrs:
- if op_type in set(("ConcatTFLite",)):
- act = op.attrs["fused_activation_function"]
- del op.attrs["fused_activation_function"]
- if act is not None:
- activation_function_to_split_out = act
-
- if activation_function_to_split_out is not None:
- act_op = Operation(activation_function_to_split_out, name + activation_function_to_split_out)
- out_tens = op.outputs[0]
- intermediate_tens = out_tens.clone("_act_intermediate")
- act_op.set_output_tensor(out_tens)
- intermediate_tens.ops = [op]
- op.outputs[0] = intermediate_tens
- act_op.inputs = [intermediate_tens]
@staticmethod
def len1_array_to_scalar(arr):