diff options
author | Tim Hall <tim.hall@arm.com> | 2021-02-04 22:47:46 +0000 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2021-02-05 11:30:49 +0000 |
commit | 73e843f76dd71e4ab5e07a7616c2c4806ca6ac25 (patch) | |
tree | 73c35c5443e041441ba826cacfc12f21d5b30bac | |
parent | 133ba7e39c9517d43690c55197d71733ad0dc38c (diff) | |
download | ethos-u-vela-73e843f76dd71e4ab5e07a7616c2c4806ca6ac25.tar.gz |
vela: Change Shape4D mutability usage
- Removed requirement for cloning shapes when unique values required
by forcing top-level immutability. This alleviates issues with Shapes
being unintentionally shared and then mutated as if value-types.
- Shape4D fields can no longer be assigned without replication.
Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: Ic0dbfa349eb0215eabefb4f4e2cf99f12d83699c
-rw-r--r-- | ethosu/vela/graph_optimiser.py | 10 | ||||
-rw-r--r-- | ethosu/vela/high_level_command_stream.py | 2 | ||||
-rw-r--r-- | ethosu/vela/npu_performance.py | 11 | ||||
-rw-r--r-- | ethosu/vela/pass_packing.py | 2 | ||||
-rw-r--r-- | ethosu/vela/shape4d.py | 102 | ||||
-rw-r--r-- | ethosu/vela/softmax.py | 5 |
6 files changed, 78 insertions, 54 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index e84e11e9..1e3b1314 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -101,14 +101,14 @@ def rewrite_concat_ops(op, arch): new_op.outputs = [ofm] new_op.attrs["concat_axis"] = axis_4D new_op.attrs["concat_start"] = offset - offset += op.ifm_shapes[idx].get_dim(axis_4D) + offset += op.ifm_shapes[idx][axis_4D] new_op.attrs["concat_end"] = offset new_op.run_on_npu = True ofm.ops.append(new_op) DebugDatabase.add_optimised(op, new_op) - new_op.ifm_shapes.append(op.ifm_shapes[idx].clone()) - new_op.ofm_shapes.append(op.ofm_shapes[0].clone()) + new_op.ifm_shapes.append(op.ifm_shapes[idx]) + new_op.ofm_shapes.append(op.ofm_shapes[0]) assert ofm.shape[axis] == offset # If axis corresponds to C-dimension, NHCWB16 can only be used in the output if all the concat_start's are a @@ -159,7 +159,7 @@ def rewrite_split_ops(tens, arch, nng): ofm_shape_idx = idx break - offset_start[axis_4D] += split_op.ofm_shapes[idx].get_dim(axis_4D) + offset_start[axis_4D] += split_op.ofm_shapes[idx][axis_4D] # If start offset is not a multiple of 16 in the C-dimension, NHCWB16 need to be avoided in the input if (offset_start[-1] % 16) != 0: @@ -171,7 +171,7 @@ def rewrite_split_ops(tens, arch, nng): new_op.run_on_npu = True new_op.set_output_tensor(tens) new_op.ifm_shapes.append(Shape4D(inp.shape)) - new_op.ofm_shapes.append(split_op.ofm_shapes[ofm_shape_idx].clone()) + new_op.ofm_shapes.append(split_op.ofm_shapes[ofm_shape_idx]) DebugDatabase.add_optimised(split_op, new_op) return tens diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py index 9cbda452..c25c023e 100644 --- a/ethosu/vela/high_level_command_stream.py +++ b/ethosu/vela/high_level_command_stream.py @@ -197,7 +197,7 @@ class NpuStripe(Command): self.pad_top = pad_top self.pad_bottom = pad_bottom for i in range(len(self.ofm_box.end_coord)): - assert self.ofm_box.end_coord[i] <= ps.ofm_shapes[0].get_dim(i) + assert self.ofm_box.end_coord[i] <= ps.ofm_shapes[0][i] def is_npu_pass_command(self): return True diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py index 3acd5e6c..5bba3b65 100644 --- a/ethosu/vela/npu_performance.py +++ b/ethosu/vela/npu_performance.py @@ -444,8 +444,8 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=None, npu_block_type = primary_op.type.npu_block_type ifm_tensor, _, weight_tensor, ofm_tensor = ps.get_primary_op_ifm_ifm2_weights_ofm() - ifm_tensor_shape = ps.primary_op.ifm_shapes[0].clone() - ofm_tensor_shape = ps.primary_op.ofm_shapes[0].clone() + ifm_tensor_shape = ps.primary_op.ifm_shapes[0] + ofm_tensor_shape = ps.primary_op.ofm_shapes[0] ofm_block.width = min(ofm_block.width, ofm_tensor_shape.width) ofm_block.height = min(ofm_block.height, ofm_tensor_shape.height) ofm_block.depth = min(ofm_block.depth, ofm_tensor_shape.depth) @@ -480,9 +480,10 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=None, batch_size = ifm_tensor_shape.batch - # add in padding - ifm_tensor_shape.height += explicit_padding[0] + explicit_padding[2] # height += top and bottom - ifm_tensor_shape.width += explicit_padding[1] + explicit_padding[3] # width += left and right + # add in padding, height += top and bottom, width += left and right + ifm_tensor_shape = ifm_tensor_shape.add( + 0, explicit_padding[0] + explicit_padding[2], explicit_padding[1] + explicit_padding[3], 0 + ) if npu_block_type != NpuBlockType.Pooling: if npu_block_type == NpuBlockType.ReduceSum: diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py index c973b9c3..abd235fd 100644 --- a/ethosu/vela/pass_packing.py +++ b/ethosu/vela/pass_packing.py @@ -231,7 +231,7 @@ def pack_into_passes(nng, arch, verbose_packing=False): ofm_tensor = op.ofm if ofm_tensor is None: ofm_tensor = op.outputs[0] - ofm_shape = op.ofm_shapes[0].clone() if op.run_on_npu else None + ofm_shape = op.ofm_shapes[0] if op.run_on_npu else None build_pass((op,), ofm_tensor, ofm_shape) diff --git a/ethosu/vela/shape4d.py b/ethosu/vela/shape4d.py index 8981e20b..e26389a1 100644 --- a/ethosu/vela/shape4d.py +++ b/ethosu/vela/shape4d.py @@ -15,66 +15,90 @@ # limitations under the License. # Description: # Defines the class Shape4D. +from collections import namedtuple + from .numeric_util import full_shape +from .numeric_util import round_up_divide -class Shape4D: +class Shape4D(namedtuple("Shape4D", ["batch", "height", "width", "depth"])): """ 4D Shape (in NHWC format) """ - def __init__(self, shape, base=1): - assert shape is not None - assert len(shape) <= 4 - self._shape4D = tuple(full_shape(4, shape, base)) + def __new__(cls, n=1, h=1, w=1, c=1): + assert n is not None + if isinstance(n, list): + assert h == 1 and w == 1 and c == 1 + tmp = full_shape(4, n, 1) + self = super(Shape4D, cls).__new__(cls, tmp[0], tmp[1], tmp[2], tmp[3]) + else: + self = super(Shape4D, cls).__new__(cls, n, h, w, c) + return self - def __str__(self): - return f"<Shape4D {self.as_list()}>" + @classmethod + def from_list(cls, shape, base=1): + tmp = full_shape(4, shape, base) + return cls(tmp[0], tmp[1], tmp[2], tmp[3]) + + @classmethod + def from_hwc(cls, h, w, c): + return cls(1, h, w, c) + + def with_batch(self, new_batch): + return Shape4D(new_batch, self.height, self.width, self.depth) - def __eq__(self, other): - return self._shape4D == other._shape4D + def with_height(self, new_height): + return Shape4D(self.batch, new_height, self.width, self.depth) - def clone(self): - return Shape4D(self.as_list()) + def with_width(self, new_width): + return Shape4D(self.batch, self.height, new_width, self.depth) - @property - def batch(self): - return self._shape4D[0] + def with_hw(self, new_height, new_width): + return Shape4D(self.batch, new_height, new_width, self.depth) - @property - def height(self): - return self._shape4D[1] + def with_depth(self, new_depth): + return Shape4D(self.batch, self.height, self.width, new_depth) - @property - def width(self): - return self._shape4D[2] + def add(self, n, h, w, c): + return Shape4D(self.batch + n, self.height + h, self.width + w, self.depth + c) - @property - def depth(self): - return self._shape4D[3] + def __add__(self, rhs): + return Shape4D(self.batch + rhs.batch, self.height + rhs.height, self.width + rhs.width, self.depth + rhs.depth) - @batch.setter - def batch(self, new_batch): - self._shape4D = (new_batch, self._shape4D[1], self._shape4D[2], self._shape4D[3]) + def __sub__(self, rhs): + return Shape4D(self.batch - rhs.batch, self.height - rhs.height, self.width - rhs.width, self.depth - rhs.depth) + + def __floordiv__(self, rhs): + return Shape4D( + self.batch // rhs.batch, self.height // rhs.height, self.width // rhs.width, self.depth // rhs.depth + ) + + def __mod__(self, rhs): + return Shape4D(self.batch % rhs.batch, self.height % rhs.height, self.width % rhs.width, self.depth % rhs.depth) + + def __str__(self): + return f"<Shape4D {list(self)}>" - @height.setter - def height(self, new_height): - self._shape4D = (self._shape4D[0], new_height, self._shape4D[2], self._shape4D[3]) + def div_round_up(self, rhs): + return Shape4D( + round_up_divide(self.batch, rhs.batch), + round_up_divide(self.height, rhs.height), + round_up_divide(self.width, rhs.width), + round_up_divide(self.depth, rhs.depth), + ) - @width.setter - def width(self, new_width): - self._shape4D = (self._shape4D[0], self._shape4D[1], new_width, self._shape4D[3]) + def elements(self): + return self.batch * self.width * self.height * self.depth - @depth.setter - def depth(self, new_depth): - self._shape4D = (self._shape4D[0], self._shape4D[1], self._shape4D[2], new_depth) + def elements_wh(self): + return self.width * self.height - def get_dim(self, dim): - assert -4 <= dim < 4 - return self._shape4D[dim] + def is_empty(self): + return (self.batch + self.width + self.height + self.depth) == 0 def as_list(self): - return list(self._shape4D) + return list(self) def get_hw_as_list(self): return list([self.height, self.width]) diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py index c3b0611a..4418f011 100644 --- a/ethosu/vela/softmax.py +++ b/ethosu/vela/softmax.py @@ -216,10 +216,9 @@ class SoftMax: # Reshape ifm/ofm (if needed) ifm_shape = self.op.ifm_shapes[0] if ifm_shape.batch > 1: - ifm_shape.height = ifm_shape.batch * ifm_shape.height - ifm_shape.batch = 1 + self.op.ifm_shapes[0] = ifm_shape.with_height(ifm_shape.batch * ifm_shape.height).with_batch(1) self.op.ifm.avoid_NHCWB16 = True - self.op.ofm_shapes[0] = ifm_shape.clone() + self.op.ofm_shapes[0] = self.op.ifm_shapes[0] self.op.ofm.avoid_NHCWB16 = True if ifm.dtype in (DataType.uint8, DataType.int8) and ofm.dtype == ifm.dtype: |