src/backends/reference/workloads/ConvImpl.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//

#include "ConvImpl.hpp"

#include <boost/assert.hpp>

#include <cmath>
#include <limits>

namespace armnn
{

QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)
{
    BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);
    if (multiplier == 0.0f)
    {
        m_Multiplier = 0;
        m_RightShift = 0;
    }
    else
    {
        const double q = std::frexp(multiplier, &m_RightShift);
        m_RightShift = -m_RightShift;
        int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
        BOOST_ASSERT(qFixed <= (1ll << 31));
        if (qFixed == (1ll << 31))
        {
            qFixed /= 2;
            --m_RightShift;
        }
        BOOST_ASSERT(m_RightShift >= 0);
        BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
        m_Multiplier = static_cast<int32_t>(qFixed);
    }
}

int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const
{
    int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
    return RoundingDivideByPOT(x, m_RightShift);
}

int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
{
    // Check for overflow.
    if (a == b && a == std::numeric_limits<int32_t>::min())
    {
        return std::numeric_limits<int32_t>::max();
    }
    int64_t a_64(a);
    int64_t b_64(b);
    int64_t ab_64 = a_64 * b_64;
    int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
    int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
    return ab_x2_high32;
}

int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
{
    BOOST_ASSERT(exponent >= 0 && exponent <= 31);
    int32_t mask = (1 << exponent) - 1;
    int32_t remainder = x & mask;
    int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
    return (x >> exponent) + (remainder > threshold ? 1 : 0);
}

} //namespace armnn