aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/architecture_features.py
diff options
context:
space:
mode:
authorFredrik Svedberg <fredrik.svedberg@arm.com>2020-06-03 15:43:31 +0200
committerLouis Verhaard <louis.verhaard@arm.com>2020-08-05 16:26:04 +0200
commita0c3624899edc601525a589643c802469003f89d (patch)
tree4fc52db04cd29901b3e5d4a7425a7a641e9647fb /ethosu/vela/architecture_features.py
parent9a03fdff316662be69a1adc4e391e43bc6519b08 (diff)
downloadethos-u-vela-a0c3624899edc601525a589643c802469003f89d.tar.gz
[MLBEDSW-2335] SoftMax int16
Added graph rewrite of Softmax for int16. Change-Id: Id7885af6056a23e8b8362fb61ae94283251eb398 Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Diffstat (limited to 'ethosu/vela/architecture_features.py')
-rw-r--r--ethosu/vela/architecture_features.py36
1 files changed, 21 insertions, 15 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index 43b32109..822bc115 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -94,14 +94,15 @@ class SHRAMElements:
IFM16 = 1
IFM8_Elementwise = 2
IFM16_Elementwise = 3
- Acc16 = 4
- Acc32 = 5
- Acc40 = 6
+ IFM32_Elementwise = 4
+ Acc16 = 5
+ Acc32 = 6
+ Acc40 = 7
Last = Acc40
- BitSizes = np.array([8, 16, 8, 16, 16, 32, 40], np.int32)
+ BitSizes = np.array([8, 16, 8, 16, 32, 16, 32, 40], np.int32)
ByteSizes = BitSizes // 8
- PostAlign = np.array([8, 8, 8, 8, 1, 1, 1], np.int32)
- PreAlign = np.array([1, 1, 1, 1, 8, 8, 8], np.int32)
+ PostAlign = np.array([8, 8, 8, 8, 8, 1, 1, 1], np.int32)
+ PreAlign = np.array([1, 1, 1, 1, 1, 8, 8, 8], np.int32)
class SHRAMBlockConfig:
@@ -150,22 +151,22 @@ Note the difference between ArchitectureFeatures and CompilerOptions
)
accelerator_configs = {
Accelerator.Yoda_512: ArchitectureConfig(
- 256, 2, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 8, 16, 20], 8
+ 256, 2, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
),
Accelerator.Yoda_256: ArchitectureConfig(
- 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 8, 16, 20], 8
+ 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
),
Accelerator.Ethos_U55_256: ArchitectureConfig(
- 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 8, 16, 20], 8
+ 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
),
Accelerator.Ethos_U55_128: ArchitectureConfig(
- 128, 1, Block(2, 1, 8), Block(2, 2, 8), 24, [4, 4, 4, 4, 4, 8, 12], 4
+ 128, 1, Block(2, 1, 8), Block(2, 2, 8), 24, [4, 4, 4, 4, 8, 4, 8, 12], 4
),
Accelerator.Ethos_U55_64: ArchitectureConfig(
- 64, 1, Block(1, 1, 8), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 8], 2
+ 64, 1, Block(1, 1, 8), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 8], 2
),
Accelerator.Ethos_U55_32: ArchitectureConfig(
- 32, 1, Block(1, 1, 4), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4], 1
+ 32, 1, Block(1, 1, 4), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 4], 1
),
}
@@ -182,6 +183,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
block_config_limit,
global_memory_clock_scale,
max_blockdep,
+ softmax_support,
):
accelerator_config = accelerator_config.lower()
self.vela_config = vela_config
@@ -262,11 +264,13 @@ Note the difference between ArchitectureFeatures and CompilerOptions
TensorPurpose.Unknown: MemArea.Unknown,
TensorPurpose.Weights: self.permanent_storage_mem_area,
TensorPurpose.FeatureMap: self.feature_map_storage_mem_area,
+ TensorPurpose.LUT: self.permanent_storage_mem_area,
}
self.tensor_storage_mem_type = {
TensorPurpose.Weights: MemType.Permanent_NPU,
TensorPurpose.FeatureMap: MemType.Scratch,
+ TensorPurpose.LUT: MemType.Scratch,
}
self.min_block_sizes = {
@@ -276,6 +280,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
NpuBlockType.Pooling: (dpu_min_height, dpu_min_width),
NpuBlockType.ConvolutionDepthWise: (dpu_min_height, dpu_min_width),
NpuBlockType.ElementWise: (1, 1),
+ NpuBlockType.ReduceSum: (dpu_min_height, dpu_min_width),
}
self.sub_kernel_limits = {
@@ -285,6 +290,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
NpuBlockType.Pooling: (8, 8),
NpuBlockType.ConvolutionDepthWise: (8, 8),
NpuBlockType.ElementWise: (1, 1),
+ NpuBlockType.ReduceSum: (8, 8),
}
# weights for scheduler search
@@ -317,7 +323,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
self.generate_block_config_map(Block(ifm_block_max.width, ifm_block_max.height, 128))
# Setup supported operators and restriction checkers class
- self.supported_operators = SupportedOperators()
+ self.supported_operators = SupportedOperators(softmax_support)
# Calculate block configuration for ALL known IFM operations and
# accumulator sizes. Consumers will need to select their preferred
@@ -358,10 +364,10 @@ Note the difference between ArchitectureFeatures and CompilerOptions
self.block_config_map[key] = self.generate_block_config(w, h, c)
def calc_ifm_block_depth(self, ifm_depth, ifm_bits):
- assert ifm_bits == 8 or ifm_bits == 16
+ assert ifm_bits in (8, 16, 32)
assert ifm_depth > 0
ifm_depth = round_up(ifm_depth, self.ifm_ublock.depth)
- max_block_depth = 32 if ifm_bits == 8 else 16
+ max_block_depth = 8 * 32 // ifm_bits
return min(max_block_depth, ifm_depth)
# Calculate the size of the IFM block given a depth, target OFM block and a kernel