aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorerik.andersson@arm.com <erik.andersson@arm.com>2021-04-14 13:31:05 +0200
committererik.andersson@arm.com <erik.andersson@arm.com>2021-04-14 14:11:42 +0200
commit1d6d5c47c2000facc377620a64084738339ccda9 (patch)
treee1ec489f9c2270dc1788b890a5d317004da6ee13
parent1c772e43434bd39aad1cb2e0036dcdf67b038633 (diff)
downloadethos-u-vela-1d6d5c47c2000facc377620a64084738339ccda9.tar.gz
MLBEDSW-4103: Block config calc update
The previous calculation of the IFM block height and width yielded incorrect block configs when running transpose_conv networks with certain hardware constraints. Signed-off-by: erik.andersson@arm.com <erik.andersson@arm.com> Change-Id: I8b6936a3e8c37da640bdeac84ecfea8363f910f9
-rw-r--r--ethosu/vela/architecture_features.py24
1 files changed, 8 insertions, 16 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index c6ca9cdb..43e82655 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -28,6 +28,7 @@ from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .numeric_util import full_shape
from .numeric_util import round_up
from .numeric_util import round_up_divide
+from .numeric_util import round_up_to_int
from .operation import Kernel
from .operation import NpuBlockType
from .operation import PointXYZ
@@ -426,27 +427,18 @@ class ArchitectureFeatures:
ifm_resampling_mode=resampling_mode.NONE,
):
upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2
+
# Height
- ifm_odd_2x_height_enable = 0
dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1
- ifm_block_height = (
- (ofm_block.height - 1) * kernel.stride.y
- + min(subkernel.height, dilated_kernel_height)
- + ifm_odd_2x_height_enable
- ) // upscaling
-
- ifm_block_height = round_up(ifm_block_height, self.ofm_ublock.height)
+ ifm_block_height = round_up_to_int(
+ ((ofm_block.height - 1) * kernel.stride.y + min(subkernel.height, dilated_kernel_height)) / upscaling
+ )
# Width
- ifm_odd_2x_width_enable = 0
dilated_kernel_width = ((kernel.width - 1) * kernel.dilation.x) + 1
- ifm_block_width = (
- (ofm_block.width - 1) * kernel.stride.x
- + min(subkernel.width, dilated_kernel_width)
- + ifm_odd_2x_width_enable
- ) // upscaling
-
- ifm_block_width = round_up(ifm_block_width, self.ofm_ublock.width)
+ ifm_block_width = round_up_to_int(
+ ((ofm_block.width - 1) * kernel.stride.x + min(subkernel.width, dilated_kernel_width)) / upscaling
+ )
return Block(ifm_block_width, ifm_block_height, ifm_block_depth)