From bf31d647dc5df47410ee577b12427ddf076d816b Mon Sep 17 00:00:00 2001 From: Patrik Gustavsson Date: Wed, 16 Dec 2020 13:08:06 +0100 Subject: MLBEDSW-3645 4D class for op ifm/ofm shapes Add 4D shape class for op Ifm/ofm shapes Signed-off-by: Patrik Gustavsson Change-Id: Ic0a98da9d2f9d085605e39a9ab5a26bad6e702a3 --- ethosu/vela/high_level_command_stream.py | 122 +++++++------------------------ 1 file changed, 26 insertions(+), 96 deletions(-) (limited to 'ethosu/vela/high_level_command_stream.py') diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py index bb4f1424..9cbda452 100644 --- a/ethosu/vela/high_level_command_stream.py +++ b/ethosu/vela/high_level_command_stream.py @@ -15,11 +15,14 @@ # limitations under the License. # Description: # Contains classes that hold commands for the high-level command stream (one command per DMA or NPU stripe). +from typing import List + import numpy as np from .architecture_features import Block from .numeric_util import round_up_divide from .operation import NpuBlockType +from .shape4d import Shape4D class Box: @@ -32,15 +35,15 @@ class Box: def transform_with_strides_and_skirt( self, - strides, - skirt, - ifm_shape, - npu_block_type, - concat_axis=0, - concat_offset=0, - split_offset=None, - k_height=1, - upscaling_factor=1, + strides: List[int], + skirt: List[int], + ifm_shape: Shape4D, + npu_block_type: NpuBlockType, + concat_axis: int = 0, + concat_offset: int = 0, + split_offset: int = None, + k_height: int = 1, + upscaling_factor: int = 1, ): new_start_coord = list(self.start_coord) new_end_coord = list(self.end_coord) @@ -58,15 +61,15 @@ class Box: ): # these types of operations do a "dot product" or sum over the entire IFM new_start_coord[-1] = 0 - new_end_coord[-1] = ifm_shape[-1] + new_end_coord[-1] = ifm_shape.depth - if npu_block_type == NpuBlockType.ElementWise and min(len(new_end_coord), len(ifm_shape)) >= 1: - new_end_coord[-1] = min(new_end_coord[-1], ifm_shape[-1]) - if min(len(new_end_coord), len(ifm_shape)) >= 2: - new_end_coord[-2] = min(new_end_coord[-2], ifm_shape[-2] * upscaling_factor) - if min(len(new_end_coord), len(ifm_shape)) >= 3: + if npu_block_type == NpuBlockType.ElementWise and len(new_end_coord) >= 1: + new_end_coord[-1] = min(new_end_coord[-1], ifm_shape.depth) + if len(new_end_coord) >= 2: + new_end_coord[-2] = min(new_end_coord[-2], ifm_shape.width * upscaling_factor) + if len(new_end_coord) >= 3: original_end_coord = list(new_end_coord) - new_end_coord[-3] = min(new_end_coord[-3], ifm_shape[-3] * upscaling_factor) + new_end_coord[-3] = min(new_end_coord[-3], ifm_shape.height * upscaling_factor) pad_top = 0 pad_bottom = 0 @@ -74,7 +77,7 @@ class Box: if len(new_start_coord) >= 2: stride = strides[2] new_start_coord[-2] = max(new_start_coord[-2] * stride - skirt[1], 0) - new_end_coord[-2] = min(new_end_coord[-2] * stride + skirt[3], ifm_shape[-2]) + new_end_coord[-2] = min(new_end_coord[-2] * stride + skirt[3], ifm_shape.width) if len(new_start_coord) >= 3: stride = strides[1] @@ -86,23 +89,20 @@ class Box: pad_top = max(0, 0 - new_start_coord[-3]) + skirt_top_remainder new_start_coord[-3] = max(new_start_coord[-3], 0) - while len(ifm_shape) < 3: - ifm_shape = [1] + ifm_shape - - if (new_end_coord[-3] * stride + skirt[2]) > (ifm_shape[-3] * upscaling_factor): + if (new_end_coord[-3] * stride + skirt[2]) > (ifm_shape.height * upscaling_factor): # pad_bottom is calculated based the diff between the end position of the weight kernel, # after last stride and the ifm height. - if upscaling_factor != 1 and original_end_coord[-3] > ifm_shape[-3] * upscaling_factor: + if upscaling_factor != 1 and original_end_coord[-3] > ifm_shape.height * upscaling_factor: # Special case for Transpose Convolution with VALID padding. - pad_bottom = original_end_coord[-3] - (ifm_shape[-3] * upscaling_factor) + pad_bottom = original_end_coord[-3] - (ifm_shape.height * upscaling_factor) else: k_start = new_start_coord[-3] - pad_top - pad_bottom = max(0, k_start + total_stride + k_height - (ifm_shape[-3] * upscaling_factor)) + pad_bottom = max(0, k_start + total_stride + k_height - (ifm_shape.height * upscaling_factor)) # Adjust for upscaling new_start_coord[-3] = max(new_start_coord[-3] // upscaling_factor, 0) new_end_coord[-3] = new_end_coord[-3] * stride + skirt[2] + (skirt[2] % upscaling_factor) - new_end_coord[-3] = max(min(new_end_coord[-3] // upscaling_factor, ifm_shape[-3]), 1) + new_end_coord[-3] = max(min(new_end_coord[-3] // upscaling_factor, ifm_shape.height), 1) return Box(new_start_coord, new_end_coord), pad_top, pad_bottom @@ -197,7 +197,7 @@ class NpuStripe(Command): self.pad_top = pad_top self.pad_bottom = pad_bottom for i in range(len(self.ofm_box.end_coord)): - assert self.ofm_box.end_coord[i] <= ps.ofm_shapes[0][i] + assert self.ofm_box.end_coord[i] <= ps.ofm_shapes[0].get_dim(i) def is_npu_pass_command(self): return True @@ -251,76 +251,6 @@ class NpuStripe(Command): assert res >= 0 return res - def get_single_block_command(self, block_idx): - block_cfg = (self.block_config[0], self.block_config[1], self.block_config[3]) - dims = self.get_block_dimensions() - strides = dims[1] * dims[2], dims[2], 1 - coord = [] - idx_left = block_idx - for s in strides: - c = idx_left // s - idx_left -= c * s - coord.append(c) - - assert idx_left == 0 - - # put in dummy height/widths in case we're dealing with FC layers - ofm_start = list(self.ofm_box.start_coord) - ofm_end = list(self.ofm_box.end_coord) - - # cut out a nice block shape - for idx in (-1, -2, -3): - if len(ofm_start) >= -idx: - ofm_start[idx] += block_cfg[idx] * coord[idx] - ofm_end[idx] = min(ofm_end[idx], ofm_start[idx] + block_cfg[idx]) - - ps = self.ps - strides = None - skirt = None - if ps.primary_op is not None: - strides = ps.primary_op.attrs.get("strides", None) - skirt = ps.primary_op.attrs.get("skirt", None) - npu_block_type = ps.npu_block_type - - ofm_box = Box(ofm_start, ofm_end) - ifm_box, _, _ = ofm_box.transform_with_strides_and_skirt( - strides, skirt, self.ifm_tensor.shape, npu_block_type, self.concat_axis, self.concat_offset - ) - - weight_box = None - if self.weight_tensor is not None: - weight_oc_start = ofm_start[-1] - weight_oc_end = ofm_end[-1] - if self.concat_axis - len(self.weight_tensor.shape) == -1: - weight_oc_start -= self.concat_offset - weight_oc_end -= self.concat_offset - - weight_box = Box.make_weight_box( - self.weight_tensor.shape, - npu_block_type, - weight_oc_start, - weight_oc_end, - self.weight_tensor.weight_transpose_depthwise, - ) - - return NpuStripe( - self.ps, - self.block_config, - self.is_first, - self.is_last, - self.is_first_h_stripe, - self.is_last_h_stripe, - self.ifm_tensor, - ifm_box, - self.ofm_tensor, - ofm_box, - self.weight_tensor, - weight_box, - self.scale_tensor, - self.concat_axis, - self.concat_offset, - ) - class DMA(Command): def __init__(self, ps, in_tensor, out_tensor, box): -- cgit v1.2.1