diff options
author | Johan Alfvén <johan.alfven@arm.com> | 2022-12-21 10:27:18 +0100 |
---|---|---|
committer | Fredrik Svedberg <fredrik.svedberg@arm.com> | 2022-12-21 14:07:57 +0000 |
commit | da69e6d75061df9eb24b5092436ea6ea81f99330 (patch) | |
tree | a785d75871ce3c568698489f80e462ad135d8d7a /ethosu | |
parent | 1bd20f2d230e2c98cf1fd3710d91df4b65f64329 (diff) | |
download | ethos-u-vela-da69e6d75061df9eb24b5092436ea6ea81f99330.tar.gz |
MLBEDSW-7111: Changed shape calculation for the rolling buffer
- When operators are cascaded, there are rolling buffers
used between the producer and the consumer operator.
Depending on the attributes, like strides, there was a use
case when the allocated intermediate buffer was too small
and resulted in a buffer overflow. The problem was that
the producer ofm stripe width was greater than the consumer
ifm stripe width.
- Changed the allocation to use the max of the producer width
and consumer width
Change-Id: I5aa20795eac5591d254b2163deec329cf9325a1b
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu')
-rw-r--r-- | ethosu/vela/cascade_builder.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/ethosu/vela/cascade_builder.py b/ethosu/vela/cascade_builder.py index ba210032..3a3026fe 100644 --- a/ethosu/vela/cascade_builder.py +++ b/ethosu/vela/cascade_builder.py @@ -76,8 +76,11 @@ class BufferMap: def rolling_buffer_shape(producer_stripe: Shape4D, consumer_stripe_input: Shape4D) -> Shape4D: """Calculates the storage shape of the rolling buffer between two SchedulerOperations in a Cascade""" buffer_height = round_up(producer_stripe.height + consumer_stripe_input.height, consumer_stripe_input.height) + # Striding on the consumer op can result in IFM widths that are narrower than the OFM width of the producer. + # Therefore, the maximum of the two needs to be used. + buffer_width = max(producer_stripe.width, consumer_stripe_input.width) # Rolling buffers have to conform to NHCWB16 format - return consumer_stripe_input.with_height(buffer_height).with_depth(round_up(producer_stripe.depth, 16)) + return Shape4D([1, buffer_height, buffer_width, round_up(producer_stripe.depth, 16)]) class CascadeBuilder: |