diff options
Diffstat (limited to 'ethosu/vela/architecture_features.py')
-rw-r--r-- | ethosu/vela/architecture_features.py | 138 |
1 files changed, 1 insertions, 137 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py index f7dcc8ce..354ab12c 100644 --- a/ethosu/vela/architecture_features.py +++ b/ethosu/vela/architecture_features.py @@ -192,6 +192,7 @@ class ArchitectureFeatures: SubKernelMax = Block(8, 8, 65536) DEFAULT_CONFIG = "internal-default" + MAX_BLOCKDEP = 3 def __init__( self, @@ -442,143 +443,6 @@ class ArchitectureFeatures: return Block(ifm_block_width, ifm_block_height, ifm_block_depth) - @staticmethod - def intersects(start_a, end_a, start_b, end_b): - start_x = max(start_a[0], start_b[0]) - end_x = min(end_a[0], end_b[0]) - start_y = max(start_a[1], start_b[1]) - end_y = min(end_a[1], end_b[1]) - start_z = max(start_a[2], start_b[2]) - end_z = min(end_a[2], end_b[2]) - return ((end_x - start_x) > 0) and ((end_y - start_y) > 0) and ((end_z - start_z) > 0) - - # Block job dependency: - # Does the VOLUME of IFMs for block job B(0) overlap with VOLUME of OFMs block jobs A(8,9,10) - # - # A | B - # ----------------------+------------------ - # .... 3,4,5,6,7,8,9,10 | 0,1,2,3,4,5,6,8 10 < JOB NUMBER - # |<------->| dependency offset - # - MAX_BLOCKDEP = 3 - - # Get the coordinates of a block offset from either the end (negative) - # or the start (zero or positive) of the given 3d area - def get_offset_block_coords(self, area: Rect, block: Block, offset): - size = area.size() - # Dimensions of the region, in blocks - width_blocks = round_up_divide(size.width, block.width) - height_blocks = round_up_divide(size.height, block.height) - depth_blocks = round_up_divide(size.depth, block.depth) - total_blocks = width_blocks * height_blocks * depth_blocks - if offset < 0: - index = total_blocks + offset - else: - index = offset - - if index >= total_blocks: - return None - - # Coordinates of the indexed block - coord_z = block.depth * (index % depth_blocks) - coord_y = block.height * (index // (depth_blocks * width_blocks)) - coord_x = block.width * ((index // depth_blocks) % width_blocks) - - return (coord_x + area.x, coord_y + area.y, coord_z + area.z) - - def get_first_job_input_volume( - self, ifm: Rect, ofm: Rect, ifm_block_depth, ofm_block: Block, kernel: Kernel, padLT, block_offset - ): - # Get ifm block size (jobs are invisibly decomposed into subkernels) - ifm_block = self.get_ifm_block_size(ifm_block_depth, ofm_block, kernel, self.ofm_block_max) - ifm_depth_blocks = round_up_divide(ifm.size().depth, ifm_block_depth) - - # Which OFM block are we calculating - ofm_coord = self.get_offset_block_coords(ofm, ofm_block, block_offset // ifm_depth_blocks) - if ofm_coord is None: - return None - - # Coordinate of the source IFM block - ifm_coord_x = max(0, ofm_coord[0] * kernel.stride.x - padLT[0]) - ifm_coord_y = max(0, ofm_coord[1] * kernel.stride.y - padLT[1]) - ifm_coord_z = ifm.z + (block_offset % ifm_depth_blocks) * ifm_block.depth - - # IFM block that will be sampled for the FIRST+block_offset job in the next operator's OFM - start_coord = (ifm_coord_x, ifm_coord_y, ifm_coord_z) - end_coord = ( - start_coord[0] + ifm_block.width, - start_coord[1] + ifm_block.height, - start_coord[2] + ifm_block.depth, - ) - return (start_coord, end_coord, 1) # start, end, total jobs - - def get_prev_job_output_volume(self, ofm: Rect, ofm_block: Block, block_offset): - assert block_offset >= 0 - - # Get OFM block's volume coordinates - start_coord = self.get_offset_block_coords(ofm, ofm_block, -1 - block_offset) - if start_coord is None: - return None - end_coord = ( - start_coord[0] + ofm_block.width, - start_coord[1] + ofm_block.height, - start_coord[2] + ofm_block.depth, - ) - return (start_coord, end_coord, 1) # start, end, total jobs for this OFM block - - def calc_block_dep( - self, - prev_ofm: Rect, - prev_ofm_block: Block, - ifm: Rect, - ofm: Rect, - ifm_block_depth, - ofm_block: Block, - kernel: Kernel, - padLT, - intersects, - ): - blockdep = ArchitectureFeatures.MAX_BLOCKDEP - - # Iterate over the next BLOCKDEP inputs, checking to see if a sliding window - # of IFM area overlaps with any previous OFM block generation. - elapsed_jobs = 0 - for forward_offset in range(ArchitectureFeatures.MAX_BLOCKDEP): - # This is the IFM block we want to sample from - in_area = self.get_first_job_input_volume( - ifm, ofm, ifm_block_depth, ofm_block, kernel, padLT, forward_offset - ) - if in_area is None: - break - - # Try several previous-OFM blocks in the past (they still might comprise multiple IFM jobs) - outstanding_jobs = 0 - for block_offset in range(ArchitectureFeatures.MAX_BLOCKDEP): - # This is the OFM block being generated by the previous op - out_area = self.get_prev_job_output_volume(prev_ofm, prev_ofm_block, block_offset) - if out_area is None: - break - - # Block dependency is the max number of allowed outstanding jobs - # in the pipeline. Selected by determining how many jobs occur - # in between two operators' overlapping OFM->IFM block volumes - if intersects(in_area[0], in_area[1], out_area[0], out_area[1]): - break - # Early exit if no intersections and we've seen enough jobs in the pipeline - elif outstanding_jobs > ArchitectureFeatures.MAX_BLOCKDEP: - break - - # This OFM had this many jobs (accumulate over multiple OFM blocks) - outstanding_jobs += out_area[2] - - blockdep = min(blockdep, elapsed_jobs + outstanding_jobs) - elapsed_jobs += in_area[2] - # Early exit if no intersections and we've seen enough jobs in the pipeline - if elapsed_jobs > ArchitectureFeatures.MAX_BLOCKDEP: - break - - return blockdep - def is_spilling_enabled(self): """ Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data |