diff options
Diffstat (limited to 'ethosu/vela/softmax.py')
-rw-r--r-- | ethosu/vela/softmax.py | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py index efd91a35..01146eef 100644 --- a/ethosu/vela/softmax.py +++ b/ethosu/vela/softmax.py @@ -24,8 +24,10 @@ import numpy as np from . import fp_math from . import scaling +from .api import NpuRoundingMode from .data_type import DataType from .debug_database import DebugDatabase +from .operation import ActivationFunction from .operation import Op from .operation import Operation from .tensor import create_const_tensor @@ -227,6 +229,12 @@ class SoftMax: no_scale_quant = ifm.quantization.clone() no_scale_quant.scale_f32 = None no_scale_quant.zero_point = 0 + activation = ActivationFunction(Op.Clip) + activation.min = ifm.quantization.quant_min + activation.max = ifm.quantization.quant_max + activation2 = activation.clone() + activation2.min = 2 * ifm.quantization.quant_min + activation2.max = 2 * ifm.quantization.quant_max one_scale_quant = ifm.quantization.clone() one_scale_quant.scale_f32 = 1.0 one_scale_quant.zero_point = 0 @@ -263,20 +271,23 @@ class SoftMax: ifm_exp = Tensor(ifm.shape, DataType.int32, sub_op.name + "_0") ifm_exp.quantization = one_scale_quant.clone() ifm_exp.quantization.zero_point = 127 - ifm_exp.quantization.quant_min = -128 - ifm_exp.quantization.quant_max = 127 + sub_op.activation = ActivationFunction(Op.LUT) + # Note: activation.min/max are non-quantized values + sub_op.activation.min = -128 - ifm_exp.quantization.zero_point + sub_op.activation.max = 127 - ifm_exp.quantization.zero_point sub_op.set_output_tensor(ifm_exp) DebugDatabase.add_optimised(self.op, sub_op) # PASS 2 - SHR shr2_op = Operation(Op.SHR, self.op.name + "_shr2") - shr2_op.attrs["rounding_mode"] = b"NATURAL" + shr2_op.attrs["rounding_mode"] = NpuRoundingMode.NATURAL shr2_op.add_input_tensor(ifm_exp) shr2_op.add_input_tensor( create_const_tensor( shr2_op.name + "_const", [1, 1, 1, 1], DataType.int32, [12], np.int32, quantization=no_scale_quant ), ) + shr2_op.activation = activation.clone() rescaled_exp = Tensor(ifm.shape, ifm_exp.dtype, shr2_op.name + "_0") rescaled_exp.quantization = no_scale_quant shr2_op.set_output_tensor(rescaled_exp) @@ -292,6 +303,7 @@ class SoftMax: reduce_sum_op.attrs["strides"] = [1, reduce_sum_op.attrs["stride_h"], reduce_sum_op.attrs["stride_w"], 1] reduce_sum_op.attrs["ksize"] = [1, reduce_sum_op.attrs["filter_height"], reduce_sum_op.attrs["filter_width"], 1] reduce_sum_op.add_input_tensor(rescaled_exp) + reduce_sum_op.activation = activation.clone() reduce_sum_shape = [1, rescaled_exp.shape[1], rescaled_exp.shape[2], 1] sum_of_exp = Tensor(reduce_sum_shape, DataType.int32, reduce_sum_op.name + "_0") @@ -302,6 +314,7 @@ class SoftMax: # PASS 4 - CLZ clz_op = Operation(Op.CLZ, self.op.name + "_clz4") clz_op.add_input_tensor(sum_of_exp) + clz_op.activation = activation.clone() headroom_plus_one = Tensor(reduce_sum_shape, DataType.int32, clz_op.name + "_0") headroom_plus_one.quantization = no_scale_quant clz_op.set_output_tensor(headroom_plus_one) @@ -320,6 +333,7 @@ class SoftMax: ), ) sub5_op.add_input_tensor(headroom_plus_one) + sub5_op.activation = activation.clone() right_shift = Tensor(reduce_sum_shape, DataType.int32, sub5_op.name + "_0") right_shift.quantization = no_scale_quant sub5_op.set_output_tensor(right_shift) @@ -330,6 +344,7 @@ class SoftMax: sub6_op = Operation(Op.Sub, self.op.name + "_sub6") sub6_op.add_input_tensor(headroom_plus_one) sub6_op.add_input_tensor(one) + sub6_op.activation = activation.clone() headroom = Tensor(reduce_sum_shape, DataType.int32, sub6_op.name + "_0") headroom.quantization = no_scale_quant sub6_op.set_output_tensor(headroom) @@ -339,8 +354,10 @@ class SoftMax: shl7_op = Operation(Op.SHL, self.op.name + "_shl7") shl7_op.add_input_tensor(sum_of_exp) shl7_op.add_input_tensor(headroom) + shl7_op.activation = activation.clone() shifted_sum = Tensor(reduce_sum_shape, DataType.int32, shl7_op.name + "_0") shifted_sum.quantization = no_scale_quant + shl7_op.set_output_tensor(shifted_sum) DebugDatabase.add_optimised(self.op, shl7_op) @@ -352,6 +369,7 @@ class SoftMax: "shifted_one_const", [1, 1, 1, 1], DataType.int32, [1 << 30], np.int32, quantization=no_scale_quant ), ) + sub8_op.activation = activation.clone() shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, sub8_op.name + "_0") shifted_sum_minus_one.quantization = no_scale_quant sub8_op.set_output_tensor(shifted_sum_minus_one) @@ -361,6 +379,7 @@ class SoftMax: shl9_op = Operation(Op.SHL, self.op.name + "_shl9") shl9_op.add_input_tensor(shifted_sum_minus_one) shl9_op.add_input_tensor(one) + shl9_op.activation = activation.clone() shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, shl9_op.name + "_0") shifted_sum_minus_one.quantization = no_scale_quant shl9_op.set_output_tensor(shifted_sum_minus_one) @@ -374,7 +393,8 @@ class SoftMax: ), ) add10_op.add_input_tensor(shifted_sum_minus_one) - add10_op.attrs["rescale"] = [1, 1] + add10_op.activation = activation.clone() + add10_op.attrs["rescale"] = (1, 1) half_denominator = Tensor(reduce_sum_shape, DataType.int32, add10_op.name + "_0") half_denominator.quantization = one_scale_quant add10_op.set_output_tensor(half_denominator) @@ -396,6 +416,7 @@ class SoftMax: rescaled = Tensor(reduce_sum_shape, DataType.int32, mul11_op.name + "_0") rescaled.quantization = one_scale_quant.clone() rescaled.quantization.scale_f32 = 2.0 + mul11_op.activation = activation2.clone() mul11_op.set_output_tensor(rescaled) DebugDatabase.add_optimised(self.op, mul11_op) @@ -407,6 +428,7 @@ class SoftMax: "48_over_17_const", [1, 1, 1, 1], DataType.int32, [1515870810], np.int32, quantization=no_scale_quant ), ) + add12_op.activation = activation.clone() rescale_w_offset = Tensor(reduce_sum_shape, DataType.int32, add12_op.name + "_0") rescale_w_offset.quantization = one_scale_quant add12_op.set_output_tensor(rescale_w_offset) @@ -424,6 +446,7 @@ class SoftMax: mul_op = Operation(Op.Mul, self.op.name + "_mul%d" % (13 + i * 5)) mul_op.add_input_tensor(nr_x) mul_op.add_input_tensor(half_denominator) + mul_op.activation = activation2.clone() half_denominator_times_x = Tensor(reduce_sum_shape, DataType.int32, mul_op.name + "_0") half_denominator_times_x.quantization = one_scale_quant.clone() half_denominator_times_x.quantization.scale_f32 = 2.0 @@ -433,6 +456,7 @@ class SoftMax: sub_op = Operation(Op.Sub, self.op.name + "_sub%d" % (14 + i * 5)) sub_op.add_input_tensor(F2_one) sub_op.add_input_tensor(half_denominator_times_x) + sub_op.activation = activation.clone() one_minus_half_denominator_times_x = Tensor(reduce_sum_shape, DataType.int32, sub_op.name + "_0") one_minus_half_denominator_times_x.quantization = one_scale_quant sub_op.set_output_tensor(one_minus_half_denominator_times_x) @@ -441,6 +465,7 @@ class SoftMax: mul_op = Operation(Op.Mul, self.op.name + "_mul%d" % (15 + i * 5)) mul_op.add_input_tensor(nr_x) mul_op.add_input_tensor(one_minus_half_denominator_times_x) + mul_op.activation = activation2.clone() to_rescale = Tensor(reduce_sum_shape, DataType.int32, mul_op.name + "_0") to_rescale.quantization = one_scale_quant.clone() to_rescale.quantization.scale_f32 = 2.0 @@ -450,6 +475,7 @@ class SoftMax: shl_op = Operation(Op.Mul, self.op.name + "_mul%d" % (16 + i * 5)) shl_op.add_input_tensor(to_rescale) shl_op.add_input_tensor(four) + shl_op.activation = activation.clone() to_add = Tensor(reduce_sum_shape, DataType.int32, shl_op.name + "_0") to_add.quantization = no_scale_quant shl_op.set_output_tensor(to_add) @@ -458,6 +484,7 @@ class SoftMax: add_op = Operation(Op.Add, self.op.name + "_add%d" % (17 + i * 5)) add_op.add_input_tensor(nr_x) add_op.add_input_tensor(to_add) + add_op.activation = activation.clone() nr_x = Tensor(reduce_sum_shape, DataType.int32, add_op.name + "_0") nr_x.quantization = one_scale_quant add_op.set_output_tensor(nr_x) @@ -469,6 +496,7 @@ class SoftMax: mul28_op.add_input_tensor( create_const_tensor("two_const", [1, 1, 1, 1], DataType.int32, [2], np.int32, quantization=no_scale_quant) ) + mul28_op.activation = activation.clone() scale_factor = Tensor(reduce_sum_shape, DataType.int32, mul28_op.name + "_0") scale_factor.quantization = one_scale_quant mul28_op.set_output_tensor(scale_factor) @@ -478,6 +506,7 @@ class SoftMax: mul_op = Operation(Op.Mul, self.op.name + "_mul29") mul_op.add_input_tensor(ifm_exp) mul_op.add_input_tensor(scale_factor) + mul_op.activation = activation2.clone() scaled_exp = Tensor(ifm_exp.shape, DataType.int32, mul_op.name + "_0") scaled_exp.quantization = one_scale_quant.clone() scaled_exp.quantization.scale_f32 = 2.0 @@ -486,7 +515,7 @@ class SoftMax: # PASS 30 - SHR shr30_op = Operation(Op.SHR, self.op.name + "_shr30") - shr30_op.attrs["rounding_mode"] = b"NATURAL" + shr30_op.attrs["rounding_mode"] = NpuRoundingMode.NATURAL shr30_op.add_input_tensor(scaled_exp) shr30_op.add_input_tensor(right_shift) shr30_op.set_output_tensor(ofm) |