aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/high_level_command_to_npu_op.py
diff options
context:
space:
mode:
authorTim Hall <tim.hall@arm.com>2021-06-08 21:25:57 +0100
committerTim Hall <tim.hall@arm.com>2021-06-08 21:25:57 +0100
commitd784af7e8995a10fb403157af48371699c35bbfe (patch)
treebf40b35b030d560049cef9411293b51e3d70ff4a /ethosu/vela/high_level_command_to_npu_op.py
parent225e19d3640288e991475ee4c49cb3ffd83cc83b (diff)
downloadethos-u-vela-d784af7e8995a10fb403157af48371699c35bbfe.tar.gz
MLBEDSW-4602: Fix Deepspeech scale & bias reuse issue.
- Deepspeech reuses identical weights and biases throughout the network. Since biases are now interleaved with weights there is a scaling issue when the ifm scales differ between operations using the same weight and scale tensor. - This commit uses interleaved weights/scales on their first use but separates scales to source memory on subsequent use (if the ifm scale is different). Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: I7aae163438160a919cae04e235966e75355a6148
Diffstat (limited to 'ethosu/vela/high_level_command_to_npu_op.py')
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py23
1 files changed, 17 insertions, 6 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 4ef7bee8..80d0e476 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -267,11 +267,14 @@ def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_sh
return fm
-def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFeatures) -> List[NpuAddressRange]:
+def create_weights(
+ weight_tensor: Tensor, weight_box: Box, scale_tensor: Tensor, arch: ArchitectureFeatures
+) -> List[NpuAddressRange]:
"""Returns address ranges for weights and scales"""
weights = []
biases = []
- region = get_region(weight_tensor.mem_type, arch)
+ shared_region = get_region(weight_tensor.mem_type, arch)
+ scale_region = scale_tensor and get_region(scale_tensor.mem_type, arch)
w_tensor_src = weight_tensor
if weight_tensor.src_tensor:
@@ -300,11 +303,19 @@ def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFea
# Location of weights in tensor
addr_range = NpuAddressRange(
- region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16)
+ shared_region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16)
)
weights.append(addr_range)
- # Location of biases in tensor
- addr_range = NpuAddressRange(region, int(address), round_up(int(weight_range.scale_bytes), 16))
+
+ # Location of standalone scales or combined weights tensor scales
+ if scale_tensor:
+ assert scale_tensor.src_tensor is None # Must be standalone
+ scale_range = scale_tensor.encoded_ranges[key]
+ address = scale_tensor.address + scale_range.offset
+ addr_range = NpuAddressRange(scale_region, int(address), round_up(int(scale_range.scale_bytes), 16))
+ else:
+ addr_range = NpuAddressRange(shared_region, int(address), round_up(int(weight_range.scale_bytes), 16))
+
biases.append(addr_range)
return weights, biases
@@ -351,7 +362,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit
npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)
if cmd.weight_tensor is not None:
- npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, arch)
+ npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, cmd.scale_tensor, arch)
npu_op.activation = create_npu_activation(op)
npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)