aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/high_level_command_to_npu_op.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/high_level_command_to_npu_op.py')
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py55
1 files changed, 33 insertions, 22 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index efd8a03d..7db4931d 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -32,7 +32,6 @@ from .api import NpuDmaOperation
from .api import NpuElementWiseOp
from .api import NpuElementWiseOperation
from .api import NpuFeatureMap
-from .api import NpuKernel
from .api import NpuLayout
from .api import NpuOperation
from .api import NpuPadding
@@ -46,15 +45,20 @@ from .api import NpuTileBox
from .architecture_features import ArchitectureFeatures
from .architecture_features import Block
from .data_type import DataType
+from .debug_database import DebugDatabase
from .high_level_command_stream import Box
from .high_level_command_stream import Command
from .high_level_command_stream import CommandType
from .high_level_command_stream import DMA
from .high_level_command_stream import NpuStripe
-from .operation import Kernel
from .operation import NpuBlockType
from .operation import Op
from .operation import Operation
+from .register_command_stream_generator import generate_command_stream
+from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
+from .register_command_stream_util import is_dma_op
+from .register_command_stream_util import to_npu_kernel
+from .register_command_stream_util import UNARY_ELEMWISE_OPS
from .tensor import MemType
from .tensor import Tensor
from .tensor import TensorBlockTraversal
@@ -62,14 +66,10 @@ from .tensor import TensorFormat
from .tensor import TensorPurpose
-unary_elementwise_ops = set((NpuElementWiseOp.ABS, NpuElementWiseOp.LRELU, NpuElementWiseOp.CLZ,))
-
-
class BasePointerIndex(IntEnum):
WeightTensor = 0 # base address index for the Weight tensor
ScratchTensor = 1 # base address index for the Scratch_tensor in the TensorArena
ScratchFastTensor = 2 # base address for the Scratch_fast_tensor
- Mem2Mem = (1 << 8) | (3 << 0) # base address slot for memory 2 memory transfer
dtype_map = {
@@ -102,20 +102,6 @@ elementwise_op_map = {
}
-def to_npu_kernel(kernel: Kernel) -> NpuKernel:
- """Converts the given internally used kernel object to NpuKernel (of public API)"""
- return NpuKernel(
- kernel.width, kernel.height, kernel.stride.x, kernel.stride.y, kernel.dilation.x, kernel.dilation.y
- )
-
-
-def to_kernel(kernel: Optional[NpuKernel]) -> Kernel:
- """Converts the given public API object to Kernel (used internally)"""
- if kernel is None:
- return Kernel(1, 1)
- return Kernel(kernel.width, kernel.height, kernel.stride_x, kernel.stride_y, kernel.dilation_x, kernel.dilation_y)
-
-
def ifm_ifm2_correct_order(ifm_shape: List[int], ifm2_shape: List[int]) -> bool:
if ifm_shape == []:
# Scalar needs to be in IFM2
@@ -412,7 +398,7 @@ def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> Npu
assert op.type in elementwise_op_map, f"Unknown elementwise type {op.type}"
elemwise_op = elementwise_op_map[op.type]
npu_op = NpuElementWiseOperation(elemwise_op)
- if elemwise_op not in unary_elementwise_ops:
+ if elemwise_op not in UNARY_ELEMWISE_OPS:
if not ifm_ifm2_correct_order(cmd.ifm_tensor.shape, cmd.ifm2_tensor.shape):
# The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms
cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor
@@ -452,7 +438,7 @@ def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation:
"""Converts the command to NpuDmaOperation"""
src_region = get_region(cmd.in_tensor, arch)
if cmd.out_tensor.purpose == TensorPurpose.LUT:
- dest_region = BasePointerIndex.Mem2Mem
+ dest_region = BASE_PTR_INDEX_MEM2MEM
else:
dest_region = get_region(cmd.out_tensor, arch)
@@ -492,3 +478,28 @@ def convert_command_to_npu_op(cmd: Command, arch: ArchitectureFeatures) -> NpuOp
# add a link to the high level command for debugging purposes
npu_op.cmd = cmd
return npu_op
+
+
+def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False):
+ """Generates command stream for the subgraph, adds it to sg.register_command_stream"""
+ # Convert high level command stream to list of NpuOperation
+ npu_op_list = []
+ npu_op_to_cmd = dict() # map from npu op to high level command
+ for cmd in sg.high_level_command_stream:
+ if cmd.cmdtype == CommandType.NpuStripe and cmd.ps.npu_block_type == NpuBlockType.Default:
+ print("Warning: Skipping register command stream generation for", cmd.ps)
+ else:
+ npu_op = convert_command_to_npu_op(cmd, arch)
+ npu_op_list.append(npu_op)
+ npu_op_to_cmd[npu_op] = cmd
+ # Generate register commands
+ stream_id = DebugDatabase.add_stream(sg)
+ DebugDatabase.set_stream_offset(sg, 0) # Default to zero, can only set during file writing
+
+ def add_to_debug_db(npu_op: NpuOperation, offset: int):
+ """Adds info to the debug database"""
+ if not is_dma_op(npu_op):
+ cmd = npu_op_to_cmd[npu_op]
+ DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op)
+
+ sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db)