aboutsummaryrefslogtreecommitdiff
path: root/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp')
-rw-r--r--src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp211
1 files changed, 163 insertions, 48 deletions
diff --git a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
index 1a4dd7aac3..56337cfdf4 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
@@ -11,12 +11,13 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
const std::string& outputName,
DType output_type,
const std::vector<int32_t>& shape,
- int32_t scale_multiplier,
- int32_t scale_shift,
+ const std::vector<int32_t>& multipliers,
+ const std::vector<int32_t>& shifts,
int32_t input_zp,
int32_t output_zp,
bool double_round,
bool scale32,
+ bool per_channel,
TosaSerializationOperator** op,
TosaSerializationTensor** tensor)
{
@@ -25,15 +26,13 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
throw armnn::Exception("CreateRescaleTosaOperator: nullptr op");
}
- std::vector<int32_t> multipliers{scale_multiplier};
- std::vector<int32_t> shifts{scale_shift};
TosaRescaleAttribute attribute(input_zp,
output_zp,
multipliers,
shifts,
scale32,
double_round,
- false, // per_channel
+ per_channel,
false, // input_unsigned
false); // output_unsigned
@@ -58,75 +57,191 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
const std::string& outputName,
DType output_type,
const std::vector<int32_t>& shape,
- double scale,
+ int32_t scale_multiplier,
+ int32_t scale_shift,
int32_t input_zp,
int32_t output_zp,
bool double_round,
bool scale32,
+ bool per_channel,
TosaSerializationOperator** op,
TosaSerializationTensor** tensor)
{
- // The code that follows is based on the behaviour specified in
- // https://www.mlplatform.org/tosa/tosa_spec.html#_precision_scaling
+ const std::vector<int32_t> multipliers{scale_multiplier};
+ const std::vector<int32_t> shifts{scale_shift};
+ CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multipliers, shifts,
+ input_zp, output_zp, double_round, scale32, per_channel, op, tensor);
+}
+
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 32-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale32(double scale,
+ int32_t &multiplier,
+ int32_t &shift)
+{
+ const double mantissa = std::frexp(scale, &shift);
+ auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
+
+ // Can't be greater than 1.0.
+ if (!(shiftedM <= (int64_t(1) << 31)))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 32 signed bits");
+ }
- auto GetScaleParams = [](double scale, double& m, int32_t& n)
+ if (shiftedM == (int64_t(1) << 31))
{
- m = 0;
- n = 0;
+ shiftedM /= 2;
+ shift++;
+ }
- double lastErr = 1e06;
+ // TOSA expects right shift to be positive, and embed (1 << 31) into right
+ // shift bits.
+ shift = (-shift) + 31;
- const int32_t numExponents = 62;
- const double start = 1.0;
- const double end = 2.0;
+ if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+ }
- // Slow iterative approach but running in Reference only
- for (int32_t i = 0; i < numExponents; ++i)
- {
- double exp = 1.0 / (1 << i);
- double currentM = scale / exp; // Find current m given value = currentM * exp
- if ((currentM >= start) && (currentM < end))
- {
- double value = currentM * exp;
- double err = std::abs(scale - value);
- if (err < lastErr)
- {
- // Take the m, n that minimize the error
- n = i;
- m = currentM;
- lastErr = err;
- }
- }
- }
- };
+ multiplier = static_cast<int32_t>(shiftedM);
- auto GetMultiplierShiftByScale = [GetScaleParams](bool scale32, double scale, int32_t& multiplier, int32_t& shift)
+ // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+ // The limit of 62 on shift allows the shift to be decomposed as
+ // two right shifts of 31.
+ if (shift > 62)
{
- double m = 0;
- int32_t n = 0;
+ // Shifting the multiplier by more than 32-bits is unnecessary.
+ multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+ shift = 62;
+ }
+}
- GetScaleParams(scale, m, n);
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 16-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale16(double scale,
+ int32_t &multiplier,
+ int32_t &shift)
+{
+ const double mantissa = std::frexp(scale, &shift);
+ auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
- multiplier = (scale32) ? (1 << 30) * static_cast<int32_t>(m) : (1 << 14) * static_cast<int32_t>(m);
- shift = (scale32) ? (30 + n) : (14 + n);
- };
+ // Can't be greater than 1.0.
+ if (!(shiftedM <= (int64_t(1) << 15)))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 16 signed bits");
+ }
+ if (shiftedM == (int64_t(1) << 15))
+ {
+ shiftedM /= 2;
+ shift++;
+ }
+
+ // TOSA expects right shift to be positive and embed (1 << 15) into right
+ // shift bits.
+ shift = (-shift) + 15;
+
+ if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+ }
+
+ multiplier = static_cast<int32_t>(shiftedM);
+
+ // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+ // The limit of 62 on shift allows the shift to be decomposed as
+ // two right shifts of 31.
+ if (shift > 62)
+ {
+ // Shifting the multiplier by more than 31-bits is unnecessary.
+ multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+ shift = 62;
+ }
+}
+
+inline void CreateRescaleTosaOperator(const std::string& inputName,
+ const std::string& outputName,
+ DType output_type,
+ const std::vector<int32_t>& shape,
+ double scale,
+ int32_t input_zp,
+ int32_t output_zp,
+ bool double_round,
+ bool scale32,
+ TosaSerializationOperator** op,
+ TosaSerializationTensor** tensor)
+{
int32_t multiplier;
int32_t shift;
- GetMultiplierShiftByScale(scale32, scale, multiplier, shift);
+
+ if (scale32)
+ {
+ ComputeMultiplierAndShiftTosaScale32(scale, multiplier, shift);
+ }
+ else
+ {
+ ComputeMultiplierAndShiftTosaScale16(scale, multiplier, shift);
+ }
+
CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multiplier, shift,
- input_zp, output_zp, double_round, scale32, op, tensor);
+ input_zp, output_zp, double_round, scale32, false, op, tensor);
}
-inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
- const std::string& outputName,
+inline void CreateRescaleTosaOperatorPerChannel(const std::string& inputName,
+ const std::string& outputName,
DType output_type,
const std::vector<int32_t>& shape,
- double output_scale,
+ int32_t input_zp,
int32_t output_zp,
+ bool double_round,
+ bool scale32,
+ double input_scale,
+ double output_scale,
+ const std::vector<float>& weight_scales,
TosaSerializationOperator** op,
TosaSerializationTensor** tensor)
{
- CreateRescaleTosaOperator(inputName, outputName, output_type, shape,
- output_scale, 0, output_zp, true, true, op, tensor);
+ std::vector<int32_t> op_tensor_multipliers;
+ std::vector<int32_t> op_tensor_shifts;
+ op_tensor_multipliers.reserve(weight_scales.size());
+ op_tensor_shifts.reserve(weight_scales.size());
+
+ for (const float& weight_scale : weight_scales)
+ {
+ double op_tensor_scale = (input_scale * weight_scale) / output_scale;
+ int32_t multiplier;
+ int32_t shift;
+
+ if (scale32)
+ {
+ ComputeMultiplierAndShiftTosaScale32(op_tensor_scale, multiplier, shift);
+ }
+ else
+ {
+ ComputeMultiplierAndShiftTosaScale16(op_tensor_scale, multiplier, shift);
+ }
+
+ op_tensor_multipliers.push_back(multiplier);
+ op_tensor_shifts.push_back(shift);
+ }
+
+ CreateRescaleTosaOperator(inputName, outputName, output_type, shape, op_tensor_multipliers, op_tensor_shifts,
+ input_zp, output_zp, double_round, scale32, true, op, tensor);
+}
+
+inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
+ const std::string& outputName,
+ DType output_type,
+ const std::vector<int32_t>& shape,
+ double output_scale,
+ int32_t output_zp,
+ TosaSerializationOperator** op,
+ TosaSerializationTensor** tensor)
+{
+ CreateRescaleTosaOperator(inputName, outputName, output_type, shape, output_scale,
+ 0, output_zp, true, true, op, tensor);
}