aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/weight_compressor.py
diff options
context:
space:
mode:
authorJacob Bohlin <jacob.bohlin@arm.com>2020-08-10 15:21:42 +0200
committerFredrik Knutsson <fredrik.knutsson.hunnebo@gmail.com>2020-08-12 12:41:34 +0000
commitde2a57f81197f475a9c74b65b02ac3d5c1e949bb (patch)
treefd762db261ac63fd70f9c2b64f5ae4f25d2f9a1e /ethosu/vela/weight_compressor.py
parent8d3216f5618bd1a276616f7d7b9956a61abfa973 (diff)
downloadethos-u-vela-de2a57f81197f475a9c74b65b02ac3d5c1e949bb.tar.gz
MLBEDSW-2684: Fix weight compression scale calculations for FC
Fixed an issue with Fully Connected weights' shape used for compression scale calculations causing incorrect performance estimates. Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com> Change-Id: Id3a5c187ad3e942b8e3d4c690b3dbba3c6fda922
Diffstat (limited to 'ethosu/vela/weight_compressor.py')
-rw-r--r--ethosu/vela/weight_compressor.py11
1 files changed, 4 insertions, 7 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index fa7d2d11..7e03a94d 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -295,9 +295,6 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth
if len(weights.shape) == 2:
weights = np.expand_dims(np.expand_dims(weights, axis=0), axis=0)
- weights_shape = (weights.shape[0], 1, 1, weights.shape[1])
- else:
- weights_shape = weights.shape
compression_scales = []
compressed_offsets = []
@@ -312,9 +309,9 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth
tens.block_traversal = TensorBlockTraversal.DepthWise
if npu_block_type == NpuBlockType.ConvolutionMxN:
# Determine which block traversal strategy has better DPU utilization
- kernel_size = weights_shape[0] * weights_shape[1]
- depth_utilization = weights_shape[2] / round_up(weights_shape[2], 32 if ifm_bitdepth == 8 else 16)
- part_kernel_utilization = (weights_shape[2] / round_up(weights_shape[2], 8)) * (
+ kernel_size = weights.shape[0] * weights.shape[1]
+ depth_utilization = weights.shape[2] / round_up(weights.shape[2], 32 if ifm_bitdepth == 8 else 16)
+ part_kernel_utilization = (weights.shape[2] / round_up(weights.shape[2], 8)) * (
kernel_size / round_up(kernel_size, 4 if ifm_bitdepth == 8 else 2)
)
if part_kernel_utilization >= depth_utilization or ifm_depth <= 8:
@@ -331,7 +328,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth
weights = np.flip(weights, axis=(0, 1))
# Calculate brick size
- brick_size = (weights_shape[0], weights_shape[1], weights_shape[2], min(tens.shape[-1], ofm_depth_step))
+ brick_size = (weights.shape[0], weights.shape[1], weights.shape[2], min(tens.shape[-1], ofm_depth_step))
elements_in_brick = np.prod(brick_size)
# Slice weight stream up depth-ways into bricks and compress