diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-08-10 15:21:42 +0200 |
---|---|---|
committer | Fredrik Knutsson <fredrik.knutsson.hunnebo@gmail.com> | 2020-08-12 12:41:34 +0000 |
commit | de2a57f81197f475a9c74b65b02ac3d5c1e949bb (patch) | |
tree | fd762db261ac63fd70f9c2b64f5ae4f25d2f9a1e /ethosu/vela/weight_compressor.py | |
parent | 8d3216f5618bd1a276616f7d7b9956a61abfa973 (diff) | |
download | ethos-u-vela-de2a57f81197f475a9c74b65b02ac3d5c1e949bb.tar.gz |
MLBEDSW-2684: Fix weight compression scale calculations for FC
Fixed an issue with Fully Connected weights' shape used for compression
scale calculations causing incorrect performance estimates.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: Id3a5c187ad3e942b8e3d4c690b3dbba3c6fda922
Diffstat (limited to 'ethosu/vela/weight_compressor.py')
-rw-r--r-- | ethosu/vela/weight_compressor.py | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index fa7d2d11..7e03a94d 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -295,9 +295,6 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth if len(weights.shape) == 2: weights = np.expand_dims(np.expand_dims(weights, axis=0), axis=0) - weights_shape = (weights.shape[0], 1, 1, weights.shape[1]) - else: - weights_shape = weights.shape compression_scales = [] compressed_offsets = [] @@ -312,9 +309,9 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth tens.block_traversal = TensorBlockTraversal.DepthWise if npu_block_type == NpuBlockType.ConvolutionMxN: # Determine which block traversal strategy has better DPU utilization - kernel_size = weights_shape[0] * weights_shape[1] - depth_utilization = weights_shape[2] / round_up(weights_shape[2], 32 if ifm_bitdepth == 8 else 16) - part_kernel_utilization = (weights_shape[2] / round_up(weights_shape[2], 8)) * ( + kernel_size = weights.shape[0] * weights.shape[1] + depth_utilization = weights.shape[2] / round_up(weights.shape[2], 32 if ifm_bitdepth == 8 else 16) + part_kernel_utilization = (weights.shape[2] / round_up(weights.shape[2], 8)) * ( kernel_size / round_up(kernel_size, 4 if ifm_bitdepth == 8 else 2) ) if part_kernel_utilization >= depth_utilization or ifm_depth <= 8: @@ -331,7 +328,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth weights = np.flip(weights, axis=(0, 1)) # Calculate brick size - brick_size = (weights_shape[0], weights_shape[1], weights_shape[2], min(tens.shape[-1], ofm_depth_step)) + brick_size = (weights.shape[0], weights.shape[1], weights.shape[2], min(tens.shape[-1], ofm_depth_step)) elements_in_brick = np.prod(brick_size) # Slice weight stream up depth-ways into bricks and compress |