aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2020-05-12 14:22:38 +0200
committerTim Hall <tim.hall@arm.com>2020-06-18 17:53:52 +0100
commitf98c674377f2d073bb719a80f3cbc6aab73acd32 (patch)
tree53788d903b474b3dfe92873d21db657abb61dadb
parentc4cbbc956e8405123e3c6a3df71e67ef492764d7 (diff)
downloadethos-u-vela-f98c674377f2d073bb719a80f3cbc6aab73acd32.tar.gz
MLBEDSW-1540: bug fix SHRAM buffer size calculation
Updated the algorithm for SHRAM buffer size calculation with block depth alignment. Change-Id: Ie8b10725bb9f52ba4a353b5a2170653833e6e5c0 Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
-rw-r--r--ethosu/vela/architecture_features.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index c8827db6..c712588f 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -96,6 +96,9 @@ class SHRAMElements:
Acc40 = 6
Last = Acc40
BitSizes = np.array([8, 16, 8, 16, 16, 32, 40], np.int32)
+ ByteSizes = BitSizes // 8
+ PostAlign = np.array([8, 8, 8, 8, 1, 1, 1], np.int32)
+ PreAlign = np.array([1, 1, 1, 1, 8, 8, 8], np.int32)
class SHRAMBlockConfig:
@@ -301,8 +304,13 @@ Note the difference between ArchitectureFeatures and CompilerOptions
# accumulator sizes. Consumers will need to select their preferred
# operation and bit-width at read-time.
def generate_block_config(self, width, height, depth):
- # Number of bytes required for any SRAM element for a FM of given dimensions
- size_bytes = (SHRAMElements.BitSizes * (height * width * depth)) // 8
+ # Number of bytes required for any SHRAM element for a FM of given dimensions.
+ # For IFM: size = H*W*Align(D*BYTE_WIDTH, 8)
+ # For ACC: size = H*W*Align(D,8)*BYTE_WIDTH
+ d1 = round_up(depth, SHRAMElements.PreAlign)
+ d2 = round_up(d1 * SHRAMElements.ByteSizes, SHRAMElements.PostAlign)
+ size_bytes = (height * width) * d2
+
# Convert byte size (rounded) to size in banks
size_banks = round_up_divide(size_bytes, self.shram_bank_size)
size_banks *= 2 # Double buffer the IFM/Acc (need twice as many banks)