8 #include <boost/assert.hpp> 18 BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);
19 if (multiplier == 0.0f)
26 const double q = std::frexp(multiplier, &m_RightShift);
27 m_RightShift = -m_RightShift;
28 int64_t qFixed =
static_cast<int64_t
>(std::round(q * (1ll << 31)));
29 BOOST_ASSERT(qFixed <= (1ll << 31));
30 if (qFixed == (1ll << 31))
35 BOOST_ASSERT(m_RightShift >= 0);
36 BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
37 m_Multiplier =
static_cast<int32_t
>(qFixed);
43 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
44 return RoundingDivideByPOT(x, m_RightShift);
47 int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
50 if (a == b && a == std::numeric_limits<int32_t>::min())
52 return std::numeric_limits<int32_t>::max();
56 int64_t ab_64 = a_64 * b_64;
57 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
58 int32_t ab_x2_high32 =
static_cast<std::int32_t
>((ab_64 + nudge) / (1ll << 31));
62 int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x,
int exponent)
64 BOOST_ASSERT(exponent >= 0 && exponent <= 31);
65 int32_t mask = (1 << exponent) - 1;
66 int32_t remainder = x & mask;
67 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
68 return (x >> exponent) + (remainder > threshold ? 1 : 0);
80 unsigned int paddingTop,
81 unsigned int paddingLeft,
84 unsigned int xDilation,
85 unsigned int yDilation,
88 if (biasEnabled && !pBiasDecoder)
95 const unsigned int heightIndex = dataLayoutIndexed.
GetHeightIndex();
96 const unsigned int widthIndex = dataLayoutIndexed.
GetWidthIndex();
98 unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;
99 unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];
100 unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];
102 unsigned int batchSize = rOutputShape[0];
103 unsigned int outputHeight = rOutputShape[heightIndex];
104 unsigned int outputWidth = rOutputShape[widthIndex];
105 unsigned int inputHeight = rInputShape[heightIndex];
106 unsigned int inputWidth = rInputShape[widthIndex];
108 unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];
109 unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];
111 for (
unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
113 for (
unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
115 for (
unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
117 for (
unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
124 for (
unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
126 unsigned int depthwiseMultiplierIdx = 0;
129 cInput = cOutput / depthMultiplier;
130 depthwiseMultiplierIdx = cOutput % depthMultiplier;
133 for (
unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
135 for (
unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
138 unsigned int filterIndex = 0;
143 filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
144 cInput * filterWidth * filterHeight +
145 yFilter * filterWidth +
154 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
155 yFilter * filterWidth * inputChannels +
156 xFilter * inputChannels +
161 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
162 cInput * filterWidth * filterHeight +
163 yFilter * filterWidth +
168 rFilterDecoder.
SetIndex(filterIndex, cOutput);
169 float filterValue = rFilterDecoder.
Get();
171 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
172 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
177 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
178 xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
184 unsigned int inputIndex = 0;
190 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
191 (yInput - paddingTop) * inputWidth * inputChannels +
192 (xInput - paddingLeft) * inputChannels +
197 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
198 inputWidth * inputHeight * cInput +
199 inputWidth * (yInput - paddingTop) +
200 xInput - paddingLeft;
203 rInputDecoder[inputIndex];
204 inputValue = rInputDecoder.
Get();
207 sum += filterValue * inputValue;
214 (*pBiasDecoder).SetIndex(cOutput, cOutput);
215 sum += pBiasDecoder->
Get();
218 unsigned int outIdx = dataLayoutIndexed.
GetIndex(rOutputShape, batchIdx, cOutput, yOutput, xOutput);
220 rOutputEncoder[outIdx];
221 rOutputEncoder.
Set(sum);
unsigned int GetHeightIndex() const
void Convolve(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
virtual BaseIterator & SetIndex(unsigned int index, unsigned int axisIndex=0)=0
unsigned int GetWidthIndex() const
unsigned int GetChannelsIndex() const
virtual IType Get() const =0
QuantizedMultiplierSmallerThanOne(float multiplier)
int32_t operator*(int32_t rhs) const
The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmaller...
virtual void Set(IType right)=0
unsigned int GetIndex(const armnn::TensorShape &shape, unsigned int batchIndex, unsigned int channelIndex, unsigned int heightIndex, unsigned int widthIndex) const