diff options
-rw-r--r-- | chapters/reduction.adoc | 143 | ||||
-rw-r--r-- | tosa.xml | 11 |
2 files changed, 85 insertions, 69 deletions
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc index 8a3ceac..19ff4ed 100644 --- a/chapters/reduction.adoc +++ b/chapters/reduction.adoc @@ -19,18 +19,19 @@ include::{generated}/operators/REDUCE_ALL.adoc[] ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); - -// Initialize output state to true -for_each(index in shape) { - tensor_write<in_out_t>(output, shape, index, true); -} -for_each(index in shape1) { - dim_t out_index = index; - out_index[axis] = 0; - in_out_t value = tensor_read<in_out_t>(input, shape1, index); - in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = state && value; - tensor_write<in_out_t>(output, shape, out_index, state); +left_shape = (axis > 1) ? shape[0:axis-1] : []; +right_shape = (axis < rank(shape)-1) ? shape[axis+1:rank(shape)-1] : []; +for_each(left_index in left_shape) { + for_each(right_index in right_shape) { + in_out_t acc = true; + for (i = 0; i < shape1[axis]; i++) { + index = flatten(left_index, [i], right_index); + in_out_t value = tensor_read<in_out_t>(input, shape1, index); + acc = acc && value; + } + out_index = flatten(left_index, [0], right_index); + tensor_write<in_out_t>(output, shape, out_index, acc); + } } ---- @@ -44,18 +45,19 @@ include::{generated}/operators/REDUCE_ANY.adoc[] ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); - -// Initialize output state to false -for_each(index in shape) { - tensor_write<in_out_t>(output, shape, index, false); -} -for_each(index in shape1) { - dim_t out_index = index; - out_index[axis] = 0; - in_out_t value = tensor_read<in_out_t>(input, shape1, index); - in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = state || value; - tensor_write<in_out_t>(output, shape, out_index, state); +left_shape = (axis > 1) ? shape[0:axis-1] : []; +right_shape = (axis < rank(shape)-1) ? shape[axis+1:rank(shape)-1] : []; +for_each(left_index in left_shape) { + for_each(right_index in right_shape) { + in_out_t acc = false; + for (i = 0; i < shape1[axis]; i++) { + index = flatten(left_index, [i], right_index); + in_out_t value = tensor_read<in_out_t>(input, shape1, index); + acc = acc || value; + } + out_index = flatten(left_index, [0], right_index); + tensor_write<in_out_t>(output, shape, out_index, acc); + } } ---- @@ -69,16 +71,19 @@ include::{generated}/operators/REDUCE_MAX.adoc[] ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); -for_each(index in shape) { - tensor_write<in_out_t>(output, shape, index, minimum<in_out_t>); -} -for_each(index in shape1) { - dim_t out_index = index; - out_index[axis] = 0; - in_out_t value = tensor_read<in_out_t>(input, shape1, index); - in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = apply_max_s<in_out_t>(state, value); - tensor_write<in_out_t>(output, shape, out_index, state); +left_shape = (axis > 1) ? shape[0:axis-1] : []; +right_shape = (axis < rank(shape)-1) ? shape[axis+1:rank(shape)-1] : []; +for_each(left_index in left_shape) { + for_each(right_index in right_shape) { + in_out_t acc = minimum<in_out_t>; + for (i = 0; i < shape1[axis]; i++) { + index = flatten(left_index, [i], right_index); + in_out_t value = tensor_read<in_out_t>(input, shape1, index); + acc = apply_max_s<in_out_t>(acc, value); + } + out_index = flatten(left_index, [0], right_index); + tensor_write<in_out_t>(output, shape, out_index, acc); + } } ---- @@ -92,16 +97,19 @@ include::{generated}/operators/REDUCE_MIN.adoc[] ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); -for_each(index in shape) { - tensor_write<in_out_t>(output, shape, index, maximum<in_out_t>); -} -for_each(index in shape1) { - dim_t out_index = index; - out_index[axis] = 0; - in_out_t value = tensor_read<in_out_t>(input, shape1, index); - in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = apply_min_s<in_out_t>(state, value); - tensor_write<in_out_t>(output, shape, out_index, state); +left_shape = (axis > 1) ? shape[0:axis-1] : []; +right_shape = (axis < rank(shape)-1) ? shape[axis+1:rank(shape)-1] : []; +for_each(left_index in left_shape) { + for_each(right_index in right_shape) { + in_out_t acc = maximum<in_out_t>; + for (i = 0; i < shape1[axis]; i++) { + index = flatten(left_index, [i], right_index); + in_out_t value = tensor_read<in_out_t>(input, shape1, index); + acc = apply_min_s<in_out_t>(acc, value); + } + out_index = flatten(left_index, [0], right_index); + tensor_write<in_out_t>(output, shape, out_index, out); + } } ---- @@ -115,16 +123,19 @@ include::{generated}/operators/REDUCE_PRODUCT.adoc[] ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); -for_each(index in shape) { - tensor_write<in_out_t>(output, shape, index, 1.0); -} -for_each(index in shape1) { - dim_t out_index = index; - out_index[axis] = 0; - in_out_t value = tensor_read<in_out_t>(input, shape1, index); - in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = apply_mul_s<in_out_t>(state, value); - tensor_write<in_out_t>(output, shape, out_index, state); +left_shape = (axis > 1) ? shape[0:axis-1] : []; +right_shape = (axis < rank(shape)-1) ? shape[axis+1:rank(shape)-1] : []; +for_each(left_index in left_shape) { + for_each(right_index in right_shape) { + in_out_t acc = 1.0; + for (i = 0; i < shape1[axis]; i++) { + index = flatten(left_index, [i], right_index); + in_out_t value = tensor_read<in_out_t>(input, shape1, index); + acc = apply_mul_s<in_out_t>(acc, value); + } + out_index = flatten(left_index, [0], right_index); + tensor_write<in_out_t>(output, shape, out_index, acc); + } } ---- @@ -138,15 +149,19 @@ include::{generated}/operators/REDUCE_SUM.adoc[] ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); -for_each(index in shape) { - tensor_write<in_out_t>(output, shape, index, 0); -} -for_each(index in shape1) { - dim_t out_index = index; - out_index[axis] = 0; - in_out_t value = tensor_read<in_out_t>(input, shape1, index); - in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = apply_add_s<in_out_t>(state, value); - tensor_write<in_out_t>(output, shape, out_index, state); +left_shape = (axis > 1) ? shape[0:axis-1] : []; +right_shape = (axis < rank(shape)-1) ? shape[axis+1:rank(shape)-1] : []; +for_each(left_index in left_shape) { + for_each(right_index in right_shape) { + acc_t acc = 0; + for (i = 0; i < shape1[axis]; i++) { + index = flatten(left_index, [i], right_index); + acc_t value = tensor_read<in_out_t>(input, shape1, index); + acc = apply_add_s<acc_t>(acc, value); + } + out_index = flatten(left_index, [0], right_index); + in_out_t result = static_cast<in_out_t>(acc); + tensor_write<in_out_t>(output, shape, out_index, result); + } } ---- @@ -1880,7 +1880,7 @@ <name>REDUCE_SUM</name> <arguments> <argument category="input" name="input" type="tensor_t" shape="shape1" tensor-element-type="in_out_t"> - <description>Input tensor with rank from 1 to 4</description> + <description>Input tensor</description> <rank min="1" max="MAX_RANK"/> </argument> <argument category="attribute" name="axis" type="tensor_t" shape="-" tensor-element-type="i32_t"> @@ -1894,17 +1894,18 @@ </arguments> <types> <type name='in_out_t'/> + <type name='acc_t'/> </types> - <typesupport mode="signed 32" in_out_t="i32_t"/> - <typesupport mode="fp16" in_out_t="fp16_t"> + <typesupport mode="signed 32" in_out_t="i32_t" acc_t="i32_t"/> + <typesupport mode="fp16" in_out_t="fp16_t" acc_t="fp16_t"> <profile name="MI"/> <profile name="MT"/> </typesupport> - <typesupport mode="bf16" in_out_t="bf16_t"> + <typesupport mode="bf16" in_out_t="bf16_t" acc_t="fp32_t"> <profile name="MI"/> <profile name="MT"/> </typesupport> - <typesupport mode="fp32" in_out_t="fp32_t"> + <typesupport mode="fp32" in_out_t="fp32_t" acc_t="fp32_t"> <profile name="MI"/> <profile name="MT"/> </typesupport> |