From d2665804871d76a16d5962952ba95500e3977c56 Mon Sep 17 00:00:00 2001 From: Louis Verhaard Date: Fri, 20 Nov 2020 13:08:55 +0100 Subject: MLBEDSW-3562: Improve blockdep calculation Blockdep calculation can now handle different sized IFM/OFM. Change-Id: I898a3c1c3a6778916802f3dbfa658328e5093096 Signed-off-by: Louis Verhaard --- .../vela/test/test_register_command_generator.py | 102 ++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) (limited to 'ethosu/vela/test/test_register_command_generator.py') diff --git a/ethosu/vela/test/test_register_command_generator.py b/ethosu/vela/test/test_register_command_generator.py index f2a16097..2760c860 100644 --- a/ethosu/vela/test/test_register_command_generator.py +++ b/ethosu/vela/test/test_register_command_generator.py @@ -17,13 +17,24 @@ # Description: # Contains unit tests for register command stream generator from ethosu.vela.api import NpuAddressRange +from ethosu.vela.api import NpuBlockTraversal +from ethosu.vela.api import NpuConv2DOperation +from ethosu.vela.api import NpuConvDepthWiseOperation from ethosu.vela.api import NpuDataType +from ethosu.vela.api import NpuElementWiseOp +from ethosu.vela.api import NpuElementWiseOperation from ethosu.vela.api import NpuFeatureMap +from ethosu.vela.api import NpuKernel from ethosu.vela.api import NpuLayout +from ethosu.vela.api import NpuPadding from ethosu.vela.api import NpuShape3D from ethosu.vela.api import NpuTileBox +from ethosu.vela.architecture_features import Accelerator +from ethosu.vela.architecture_features import create_default_arch +from ethosu.vela.register_command_stream_generator import calc_blockdep from ethosu.vela.register_command_stream_generator import get_address_ranges from ethosu.vela.register_command_stream_generator import get_strides +from ethosu.vela.test.extapi.test_extapi_generate_commands import create_feature_map def test_get_fm_strides(): @@ -39,6 +50,11 @@ def test_get_fm_strides(): assert get_strides(fm) == NpuShape3D(height=240, width=24, depth=1) +# ------------------------------------------------------------------- +# ADDRESS TESTS +# ------------------------------------------------------------------- + + def test_get_address_ranges_one_tile(): """Tests calculation of feature map address ranges, with 1 tile used""" fm = NpuFeatureMap() @@ -100,5 +116,89 @@ def test_get_address_ranges_4_tiles(): NpuAddressRange(region=6, address=16, length=18952), NpuAddressRange(region=6, address=32000, length=6280), NpuAddressRange(region=6, address=8000, length=12552), - NpuAddressRange(region=6, address=28800, length=12680), + NpuAddressRange(region=6, address=16000, length=25480), ] + + +# ------------------------------------------------------------------- +# BLOCKDEP TESTS +# ------------------------------------------------------------------- + + +def test_calc_blockdep0(): + """ + Tests blockdep calculation, op1 that produces op2's IFM2. + op2 takes 1 block to complete, which results in blockdep 0 + """ + op1 = NpuElementWiseOperation(NpuElementWiseOp.CLZ) + op1.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x60, layout=NpuLayout.NHCWB16,) + intermediate_fm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xA0, layout=NpuLayout.NHCWB16,) + op1.ofm = intermediate_fm + op1.block_config = NpuShape3D(height=1, width=1, depth=4) + op2 = NpuElementWiseOperation(NpuElementWiseOp.SUB) + op2.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x39AC0, layout=NpuLayout.NHCWB16,) + op2.ifm2 = intermediate_fm + op2.ofm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xE0, layout=NpuLayout.NHCWB16,) + op2.block_config = NpuShape3D(height=1, width=1, depth=4) + arch = create_default_arch(Accelerator.Ethos_U55_128) + block_dep = calc_blockdep(arch, op1, op2) + assert block_dep == 0 + + +def test_calc_blockdep2(): + """ + Tests blockdep calculation, op1 produces part of the input of op2, + op1 and op2 have different sizes. + op2 takes 3 blocks to complete, op1's last block collides with op2's last block + which results in blockdep 2 + """ + op1 = NpuConv2DOperation() + op1.ifm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,) + op1.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,) + op1.kernel = NpuKernel(1, 1) + op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=208)] + op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=160)] + op1.padding = NpuPadding(top=0, left=0, right=0, bottom=0) + op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST + op1.block_config = NpuShape3D(height=4, width=6, depth=16) + op2 = NpuConvDepthWiseOperation() + op2.ifm = create_feature_map(NpuShape3D(height=3, width=48, depth=16), 1, 0, layout=NpuLayout.NHCWB16,) + # op2 has two tiles, the lower tile is produced by op1 + op2.ifm.tiles = NpuTileBox(height_0=2, height_1=2, width_0=48, addresses=[0x7680, 0, 0x6480, 0]) + op2.ofm = create_feature_map(NpuShape3D(height=1, width=24, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,) + op2.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2) + op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=208)] + op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=160)] + op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0) + op2.block_config = NpuShape3D(height=1, width=8, depth=16) + arch = create_default_arch(Accelerator.Ethos_U55_128) + block_dep = calc_blockdep(arch, op1, op2) + assert block_dep == 2 + + +def test_calc_blockdep3(): + """ + Tests blockdep calculation, op2 consumes part of op1, op1 and op2 have different sizes. + There is no overlap between the last blocks of op1 and the first jobs of op2, + which results in blockdep 3 + """ + op1 = NpuConv2DOperation() + op1.ifm = create_feature_map(NpuShape3D(height=13, width=96, depth=1), 1, 0, layout=NpuLayout.NHWC,) + op1.ofm = create_feature_map(NpuShape3D(height=6, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,) + op1.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2) + op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=144)] + op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=80)] + op1.padding = NpuPadding(top=0, left=0, right=1, bottom=0) + op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST + op1.block_config = NpuShape3D(height=6, width=3, depth=8) + op2 = NpuConvDepthWiseOperation() + op2.ifm = create_feature_map(NpuShape3D(height=5, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,) + op2.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,) + op2.kernel = NpuKernel(3, 3) + op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=112)] + op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=80)] + op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0) + op2.block_config = NpuShape3D(height=4, width=6, depth=8) + arch = create_default_arch(Accelerator.Ethos_U55_128) + block_dep = calc_blockdep(arch, op1, op2) + assert block_dep == 3 -- cgit v1.2.1