aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/assembly/arm_gemm.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/assembly/arm_gemm.hpp')
-rw-r--r--src/core/NEON/kernels/assembly/arm_gemm.hpp41
1 files changed, 22 insertions, 19 deletions
diff --git a/src/core/NEON/kernels/assembly/arm_gemm.hpp b/src/core/NEON/kernels/assembly/arm_gemm.hpp
index 58db511547..f6421c12ab 100644
--- a/src/core/NEON/kernels/assembly/arm_gemm.hpp
+++ b/src/core/NEON/kernels/assembly/arm_gemm.hpp
@@ -122,38 +122,41 @@ public:
struct Requantize32
{
public:
- const int32_t *bias = nullptr;
- size_t bias_multi_stride = 0;
- int32_t a_offset = 0;
- int32_t b_offset = 0;
- int32_t c_offset = 0;
- bool per_channel_requant = false;
- int32_t per_layer_shift = 0;
- int32_t per_layer_mul = 0;
- const int32_t *per_channel_shifts = nullptr;
- const int32_t *per_channel_muls = nullptr;
- int32_t minval = 0;
- int32_t maxval = 0;
+ const int32_t *bias = nullptr;
+ size_t bias_multi_stride = 0;
+ int32_t a_offset = 0;
+ int32_t b_offset = 0;
+ int32_t c_offset = 0;
+ bool per_channel_requant = false;
+ int32_t per_layer_left_shift = 0;
+ int32_t per_layer_right_shift = 0;
+ int32_t per_layer_mul = 0;
+ const int32_t *per_channel_left_shifts = nullptr;
+ const int32_t *per_channel_right_shifts = nullptr;
+ const int32_t *per_channel_muls = nullptr;
+ int32_t minval = 0;
+ int32_t maxval = 0;
Requantize32() = default;
// Constructor for per-tensor quantization
Requantize32(const int32_t *bias, size_t bias_multi_stride,
int32_t a_offset, int32_t b_offset, int32_t c_offset,
- int32_t requant_shift, int32_t requant_mul,
- int32_t minv, int32_t maxv)
- : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_shift(requant_shift), per_layer_mul(requant_mul),
- minval(minv), maxval(maxv)
+ int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv)
+ : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(std::max(requant_shift, int32_t(0))),
+ per_layer_right_shift(std::min(requant_shift, int32_t(0))), per_layer_mul(requant_mul), minval(minv), maxval(maxv)
{
}
// Constructor for per-channel quantization
Requantize32(const int32_t *bias, size_t bias_multi_stride,
int32_t a_offset, int32_t b_offset, int32_t c_offset,
- const int32_t *requant_shifts, const int32_t *requant_muls,
+ const int32_t *requant_left_shifts,
+ const int32_t *requant_right_shifts,
+ const int32_t *requant_muls,
int32_t minv, int32_t maxv)
- : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(true), per_channel_shifts(requant_shifts),
- per_channel_muls(requant_muls), minval(minv), maxval(maxv)
+ : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(true), per_channel_left_shifts(requant_left_shifts),
+ per_channel_right_shifts(requant_right_shifts), per_channel_muls(requant_muls), minval(minv), maxval(maxv)
{
}
};