aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/architecture_features.py
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2020-11-26 11:42:04 +0100
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2020-12-07 14:51:52 +0000
commit1e17018d1aabff6b2a4cc5e8e3758678347b84c5 (patch)
tree8c06cb5a9f68e45fce96d9f17aac9a86f28ad912 /ethosu/vela/architecture_features.py
parent32c7f5bbbccae480a0bb0c0e5b74a37dd9412023 (diff)
downloadethos-u-vela-1e17018d1aabff6b2a4cc5e8e3758678347b84c5.tar.gz
MLBEDSW-3643: Refactor blockdep calculation
Moved blockdep calculation and other helper functions for code generation to a separate file. Change-Id: I2f8ccea478654272ebf42217fc5c1800e9ad177a Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Diffstat (limited to 'ethosu/vela/architecture_features.py')
-rw-r--r--ethosu/vela/architecture_features.py138
1 files changed, 1 insertions, 137 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index f7dcc8ce..354ab12c 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -192,6 +192,7 @@ class ArchitectureFeatures:
SubKernelMax = Block(8, 8, 65536)
DEFAULT_CONFIG = "internal-default"
+ MAX_BLOCKDEP = 3
def __init__(
self,
@@ -442,143 +443,6 @@ class ArchitectureFeatures:
return Block(ifm_block_width, ifm_block_height, ifm_block_depth)
- @staticmethod
- def intersects(start_a, end_a, start_b, end_b):
- start_x = max(start_a[0], start_b[0])
- end_x = min(end_a[0], end_b[0])
- start_y = max(start_a[1], start_b[1])
- end_y = min(end_a[1], end_b[1])
- start_z = max(start_a[2], start_b[2])
- end_z = min(end_a[2], end_b[2])
- return ((end_x - start_x) > 0) and ((end_y - start_y) > 0) and ((end_z - start_z) > 0)
-
- # Block job dependency:
- # Does the VOLUME of IFMs for block job B(0) overlap with VOLUME of OFMs block jobs A(8,9,10)
- #
- # A | B
- # ----------------------+------------------
- # .... 3,4,5,6,7,8,9,10 | 0,1,2,3,4,5,6,8 10 < JOB NUMBER
- # |<------->| dependency offset
- #
- MAX_BLOCKDEP = 3
-
- # Get the coordinates of a block offset from either the end (negative)
- # or the start (zero or positive) of the given 3d area
- def get_offset_block_coords(self, area: Rect, block: Block, offset):
- size = area.size()
- # Dimensions of the region, in blocks
- width_blocks = round_up_divide(size.width, block.width)
- height_blocks = round_up_divide(size.height, block.height)
- depth_blocks = round_up_divide(size.depth, block.depth)
- total_blocks = width_blocks * height_blocks * depth_blocks
- if offset < 0:
- index = total_blocks + offset
- else:
- index = offset
-
- if index >= total_blocks:
- return None
-
- # Coordinates of the indexed block
- coord_z = block.depth * (index % depth_blocks)
- coord_y = block.height * (index // (depth_blocks * width_blocks))
- coord_x = block.width * ((index // depth_blocks) % width_blocks)
-
- return (coord_x + area.x, coord_y + area.y, coord_z + area.z)
-
- def get_first_job_input_volume(
- self, ifm: Rect, ofm: Rect, ifm_block_depth, ofm_block: Block, kernel: Kernel, padLT, block_offset
- ):
- # Get ifm block size (jobs are invisibly decomposed into subkernels)
- ifm_block = self.get_ifm_block_size(ifm_block_depth, ofm_block, kernel, self.ofm_block_max)
- ifm_depth_blocks = round_up_divide(ifm.size().depth, ifm_block_depth)
-
- # Which OFM block are we calculating
- ofm_coord = self.get_offset_block_coords(ofm, ofm_block, block_offset // ifm_depth_blocks)
- if ofm_coord is None:
- return None
-
- # Coordinate of the source IFM block
- ifm_coord_x = max(0, ofm_coord[0] * kernel.stride.x - padLT[0])
- ifm_coord_y = max(0, ofm_coord[1] * kernel.stride.y - padLT[1])
- ifm_coord_z = ifm.z + (block_offset % ifm_depth_blocks) * ifm_block.depth
-
- # IFM block that will be sampled for the FIRST+block_offset job in the next operator's OFM
- start_coord = (ifm_coord_x, ifm_coord_y, ifm_coord_z)
- end_coord = (
- start_coord[0] + ifm_block.width,
- start_coord[1] + ifm_block.height,
- start_coord[2] + ifm_block.depth,
- )
- return (start_coord, end_coord, 1) # start, end, total jobs
-
- def get_prev_job_output_volume(self, ofm: Rect, ofm_block: Block, block_offset):
- assert block_offset >= 0
-
- # Get OFM block's volume coordinates
- start_coord = self.get_offset_block_coords(ofm, ofm_block, -1 - block_offset)
- if start_coord is None:
- return None
- end_coord = (
- start_coord[0] + ofm_block.width,
- start_coord[1] + ofm_block.height,
- start_coord[2] + ofm_block.depth,
- )
- return (start_coord, end_coord, 1) # start, end, total jobs for this OFM block
-
- def calc_block_dep(
- self,
- prev_ofm: Rect,
- prev_ofm_block: Block,
- ifm: Rect,
- ofm: Rect,
- ifm_block_depth,
- ofm_block: Block,
- kernel: Kernel,
- padLT,
- intersects,
- ):
- blockdep = ArchitectureFeatures.MAX_BLOCKDEP
-
- # Iterate over the next BLOCKDEP inputs, checking to see if a sliding window
- # of IFM area overlaps with any previous OFM block generation.
- elapsed_jobs = 0
- for forward_offset in range(ArchitectureFeatures.MAX_BLOCKDEP):
- # This is the IFM block we want to sample from
- in_area = self.get_first_job_input_volume(
- ifm, ofm, ifm_block_depth, ofm_block, kernel, padLT, forward_offset
- )
- if in_area is None:
- break
-
- # Try several previous-OFM blocks in the past (they still might comprise multiple IFM jobs)
- outstanding_jobs = 0
- for block_offset in range(ArchitectureFeatures.MAX_BLOCKDEP):
- # This is the OFM block being generated by the previous op
- out_area = self.get_prev_job_output_volume(prev_ofm, prev_ofm_block, block_offset)
- if out_area is None:
- break
-
- # Block dependency is the max number of allowed outstanding jobs
- # in the pipeline. Selected by determining how many jobs occur
- # in between two operators' overlapping OFM->IFM block volumes
- if intersects(in_area[0], in_area[1], out_area[0], out_area[1]):
- break
- # Early exit if no intersections and we've seen enough jobs in the pipeline
- elif outstanding_jobs > ArchitectureFeatures.MAX_BLOCKDEP:
- break
-
- # This OFM had this many jobs (accumulate over multiple OFM blocks)
- outstanding_jobs += out_area[2]
-
- blockdep = min(blockdep, elapsed_jobs + outstanding_jobs)
- elapsed_jobs += in_area[2]
- # Early exit if no intersections and we've seen enough jobs in the pipeline
- if elapsed_jobs > ArchitectureFeatures.MAX_BLOCKDEP:
- break
-
- return blockdep
-
def is_spilling_enabled(self):
"""
Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data