aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/npu_serialisation.py
diff options
context:
space:
mode:
authorCharles Xu <charles.xu@arm.com>2020-05-13 10:15:26 +0200
committerTim Hall <tim.hall@arm.com>2020-06-18 17:53:52 +0100
commit78792223369fa34dacd0e69e189af035283da2ae (patch)
treeac3826df5528866319fd65d7a99eef8e87cd4084 /ethosu/vela/npu_serialisation.py
parent620d88c60482bad4d96da4d32cc4cca5561cca9e (diff)
downloadethos-u-vela-78792223369fa34dacd0e69e189af035283da2ae.tar.gz
Add elementwise vector scalars support
Write the constant scalars into flash. In case it's Dram or OffChipFlash, DMA the scalars from flash to sram. Signed-off-by: Charles Xu <charles.xu@arm.com> Change-Id: I42300a05dfe968d623b8aec8549644549e0f54b5
Diffstat (limited to 'ethosu/vela/npu_serialisation.py')
-rw-r--r--ethosu/vela/npu_serialisation.py30
1 files changed, 20 insertions, 10 deletions
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index b8ac20f3..0cb40ed0 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py
@@ -46,6 +46,10 @@ def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor):
memory_tensor.values[start_addr:end_addr] = compressed_values
start_addr = end_addr
+def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor):
+ start_addr = src_tensor.address
+ end_addr = start_addr + src_tensor.quant_values.size
+ memory_tensor.values[start_addr:end_addr] = src_tensor.quant_values
def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens):
if sg.placement != PassPlacement.Npu:
@@ -90,16 +94,22 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens)
for cps in sg.cascaded_passes:
for ps in cps.passes:
- if ps.placement == PassPlacement.Npu and ps.weight_tensor is not None:
- # For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address
- # is pointing at the destination address of where the weights should be placed in SRAM.
- # This ensures that the Flash weight tensor is used instead and thus gets the correct address.
- if ps.weight_tensor.ops[0].type == "DMA":
- copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0])
- else:
- copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor)
-
- copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor)
+ if ps.placement == PassPlacement.Npu:
+ if ps.weight_tensor != None:
+ # For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address
+ # is pointing at the destination address of where the weights should be placed in SRAM.
+ # This ensures that the Flash weight tensor is used instead and thus gets the correct address.
+ if ps.weight_tensor.ops[0].type == "DMA":
+ copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0])
+ else:
+ copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor)
+
+ copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor)
+
+ if ps.ifm_tensor != None and ps.ifm_tensor.mem_area != MemArea.Sram:
+ copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm_tensor)
+ if ps.ifm2_tensor != None and ps.ifm2_tensor.mem_area != MemArea.Sram:
+ copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm2_tensor)
sg.command_stream_tensor = make_memory_tensor(
sg.name + "_command_stream", flash_area, command_stream_size_bytes, True, arch