From 1e17018d1aabff6b2a4cc5e8e3758678347b84c5 Mon Sep 17 00:00:00 2001 From: Louis Verhaard Date: Thu, 26 Nov 2020 11:42:04 +0100 Subject: MLBEDSW-3643: Refactor blockdep calculation Moved blockdep calculation and other helper functions for code generation to a separate file. Change-Id: I2f8ccea478654272ebf42217fc5c1800e9ad177a Signed-off-by: Louis Verhaard --- ethosu/vela/high_level_command_to_npu_op.py | 55 +++++++++++++++++------------ 1 file changed, 33 insertions(+), 22 deletions(-) (limited to 'ethosu/vela/high_level_command_to_npu_op.py') diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index efd8a03d..7db4931d 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -32,7 +32,6 @@ from .api import NpuDmaOperation from .api import NpuElementWiseOp from .api import NpuElementWiseOperation from .api import NpuFeatureMap -from .api import NpuKernel from .api import NpuLayout from .api import NpuOperation from .api import NpuPadding @@ -46,15 +45,20 @@ from .api import NpuTileBox from .architecture_features import ArchitectureFeatures from .architecture_features import Block from .data_type import DataType +from .debug_database import DebugDatabase from .high_level_command_stream import Box from .high_level_command_stream import Command from .high_level_command_stream import CommandType from .high_level_command_stream import DMA from .high_level_command_stream import NpuStripe -from .operation import Kernel from .operation import NpuBlockType from .operation import Op from .operation import Operation +from .register_command_stream_generator import generate_command_stream +from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM +from .register_command_stream_util import is_dma_op +from .register_command_stream_util import to_npu_kernel +from .register_command_stream_util import UNARY_ELEMWISE_OPS from .tensor import MemType from .tensor import Tensor from .tensor import TensorBlockTraversal @@ -62,14 +66,10 @@ from .tensor import TensorFormat from .tensor import TensorPurpose -unary_elementwise_ops = set((NpuElementWiseOp.ABS, NpuElementWiseOp.LRELU, NpuElementWiseOp.CLZ,)) - - class BasePointerIndex(IntEnum): WeightTensor = 0 # base address index for the Weight tensor ScratchTensor = 1 # base address index for the Scratch_tensor in the TensorArena ScratchFastTensor = 2 # base address for the Scratch_fast_tensor - Mem2Mem = (1 << 8) | (3 << 0) # base address slot for memory 2 memory transfer dtype_map = { @@ -102,20 +102,6 @@ elementwise_op_map = { } -def to_npu_kernel(kernel: Kernel) -> NpuKernel: - """Converts the given internally used kernel object to NpuKernel (of public API)""" - return NpuKernel( - kernel.width, kernel.height, kernel.stride.x, kernel.stride.y, kernel.dilation.x, kernel.dilation.y - ) - - -def to_kernel(kernel: Optional[NpuKernel]) -> Kernel: - """Converts the given public API object to Kernel (used internally)""" - if kernel is None: - return Kernel(1, 1) - return Kernel(kernel.width, kernel.height, kernel.stride_x, kernel.stride_y, kernel.dilation_x, kernel.dilation_y) - - def ifm_ifm2_correct_order(ifm_shape: List[int], ifm2_shape: List[int]) -> bool: if ifm_shape == []: # Scalar needs to be in IFM2 @@ -412,7 +398,7 @@ def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> Npu assert op.type in elementwise_op_map, f"Unknown elementwise type {op.type}" elemwise_op = elementwise_op_map[op.type] npu_op = NpuElementWiseOperation(elemwise_op) - if elemwise_op not in unary_elementwise_ops: + if elemwise_op not in UNARY_ELEMWISE_OPS: if not ifm_ifm2_correct_order(cmd.ifm_tensor.shape, cmd.ifm2_tensor.shape): # The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor @@ -452,7 +438,7 @@ def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation: """Converts the command to NpuDmaOperation""" src_region = get_region(cmd.in_tensor, arch) if cmd.out_tensor.purpose == TensorPurpose.LUT: - dest_region = BasePointerIndex.Mem2Mem + dest_region = BASE_PTR_INDEX_MEM2MEM else: dest_region = get_region(cmd.out_tensor, arch) @@ -492,3 +478,28 @@ def convert_command_to_npu_op(cmd: Command, arch: ArchitectureFeatures) -> NpuOp # add a link to the high level command for debugging purposes npu_op.cmd = cmd return npu_op + + +def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False): + """Generates command stream for the subgraph, adds it to sg.register_command_stream""" + # Convert high level command stream to list of NpuOperation + npu_op_list = [] + npu_op_to_cmd = dict() # map from npu op to high level command + for cmd in sg.high_level_command_stream: + if cmd.cmdtype == CommandType.NpuStripe and cmd.ps.npu_block_type == NpuBlockType.Default: + print("Warning: Skipping register command stream generation for", cmd.ps) + else: + npu_op = convert_command_to_npu_op(cmd, arch) + npu_op_list.append(npu_op) + npu_op_to_cmd[npu_op] = cmd + # Generate register commands + stream_id = DebugDatabase.add_stream(sg) + DebugDatabase.set_stream_offset(sg, 0) # Default to zero, can only set during file writing + + def add_to_debug_db(npu_op: NpuOperation, offset: int): + """Adds info to the debug database""" + if not is_dma_op(npu_op): + cmd = npu_op_to_cmd[npu_op] + DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op) + + sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db) -- cgit v1.2.1