aboutsummaryrefslogtreecommitdiff
path: root/22.08/_ethosn_ref_conv_impl_8hpp_source.xhtml
blob: e8391aacd614f27e2bec9327e65c2ff53ec10b91 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
<!-- Copyright (c) 2020 ARM Limited. -->
<!--                                 -->
<!-- SPDX-License-Identifier: MIT    -->
<!--                                 -->
<!-- HTML header for doxygen 1.8.13-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.13"/>
<meta name="robots" content="NOINDEX, NOFOLLOW" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>ArmNN: src/backends/ethosnref/workloads/EthosnRefConvImpl.hpp Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtreedata.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
  $(document).ready(initResizable);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/x-mathjax-config">
  MathJax.Hub.Config({
    extensions: ["tex2jax.js"],
    jax: ["input/TeX","output/HTML-CSS"],
});
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="stylesheet.css" rel="stylesheet" type="text/css"/>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr style="height: 56px;">
  <img alt="ArmNN" src="Arm_NN_horizontal_blue.png" style="max-width: 10rem; margin-top: .5rem; margin-left 10px"/>
  <td style="padding-left: 0.5em;">
   <div id="projectname">
   &#160;<span id="projectnumber">22.08</span>
   </div>
  </td>
 </tr>
 </tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.13 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
$(function() {
  initMenu('',true,false,'search.php','Search');
  $(document).ready(function() { init_search(); });
});
</script>
<div id="main-nav"></div>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
  <div id="nav-tree">
    <div id="nav-tree-contents">
      <div id="nav-sync" class="sync"></div>
    </div>
  </div>
  <div id="splitbar" style="-moz-user-select:none;" 
       class="ui-resizable-handle">
  </div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('_ethosn_ref_conv_impl_8hpp_source.xhtml','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
     onmouseover="return searchBox.OnSearchSelectShow()"
     onmouseout="return searchBox.OnSearchSelectHide()"
     onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>

<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0" 
        name="MSearchResults" id="MSearchResults">
</iframe>
</div>

<div class="header">
  <div class="headertitle">
<div class="title">EthosnRefConvImpl.hpp</div>  </div>
</div><!--header-->
<div class="contents">
<a href="_ethosn_ref_conv_impl_8hpp.xhtml">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">//</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment">// Copyright © 2017 Arm Ltd. All rights reserved.</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment">// SPDX-License-Identifier: MIT</span></div><div class="line"><a name="l00004"></a><span class="lineno">    4</span>&#160;<span class="comment">//</span></div><div class="line"><a name="l00005"></a><span class="lineno">    5</span>&#160;</div><div class="line"><a name="l00006"></a><span class="lineno">    6</span>&#160;<span class="preprocessor">#pragma once</span></div><div class="line"><a name="l00007"></a><span class="lineno">    7</span>&#160;</div><div class="line"><a name="l00008"></a><span class="lineno">    8</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="_ethosn_ref_workload_utils_8hpp.xhtml">EthosnRefWorkloadUtils.hpp</a>&quot;</span></div><div class="line"><a name="l00009"></a><span class="lineno">    9</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="ethosnref_2workloads_2_tensor_buffer_array_view_8hpp.xhtml">TensorBufferArrayView.hpp</a>&quot;</span></div><div class="line"><a name="l00010"></a><span class="lineno">   10</span>&#160;</div><div class="line"><a name="l00011"></a><span class="lineno">   11</span>&#160;<span class="preprocessor">#include &lt;<a class="code" href="_tensor_8hpp.xhtml">armnn/Tensor.hpp</a>&gt;</span></div><div class="line"><a name="l00012"></a><span class="lineno">   12</span>&#160;</div><div class="line"><a name="l00013"></a><span class="lineno">   13</span>&#160;<span class="preprocessor">#include &lt;<a class="code" href="_data_layout_indexed_8hpp.xhtml">armnnUtils/DataLayoutIndexed.hpp</a>&gt;</span></div><div class="line"><a name="l00014"></a><span class="lineno">   14</span>&#160;</div><div class="line"><a name="l00015"></a><span class="lineno">   15</span>&#160;<span class="preprocessor">#include &lt;<a class="code" href="_numeric_cast_8hpp.xhtml">armnn/utility/NumericCast.hpp</a>&gt;</span></div><div class="line"><a name="l00016"></a><span class="lineno">   16</span>&#160;</div><div class="line"><a name="l00017"></a><span class="lineno">   17</span>&#160;<span class="preprocessor">#include &lt;cmath&gt;</span></div><div class="line"><a name="l00018"></a><span class="lineno">   18</span>&#160;<span class="preprocessor">#include &lt;limits&gt;</span></div><div class="line"><a name="l00019"></a><span class="lineno">   19</span>&#160;</div><div class="line"><a name="l00020"></a><span class="lineno">   20</span>&#160;<span class="keyword">namespace </span><a class="code" href="namespacearmnn.xhtml">armnn</a></div><div class="line"><a name="l00021"></a><span class="lineno">   21</span>&#160;{</div><div class="line"><a name="l00022"></a><span class="lineno">   22</span>&#160;<span class="comment"></span></div><div class="line"><a name="l00023"></a><span class="lineno">   23</span>&#160;<span class="comment">/// Performs multiplication of an integer with a multiplier which is less than one,</span></div><div class="line"><a name="l00024"></a><span class="lineno">   24</span>&#160;<span class="comment">/// using quantized integer arithmetic which is consistent with AndroidNN&#39;s CPU executor.</span></div><div class="line"><a name="l00025"></a><span class="lineno"><a class="line" href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml">   25</a></span>&#160;<span class="comment"></span><span class="keyword">struct </span><a class="code" href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml">EthosnRefQuantizedMultiplierSmallerThanOne</a></div><div class="line"><a name="l00026"></a><span class="lineno">   26</span>&#160;{</div><div class="line"><a name="l00027"></a><span class="lineno">   27</span>&#160;<span class="keyword">public</span>:<span class="comment"></span></div><div class="line"><a name="l00028"></a><span class="lineno">   28</span>&#160;<span class="comment">    /// Constructs a EthosnRefQuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.</span></div><div class="line"><a name="l00029"></a><span class="lineno">   29</span>&#160;<span class="comment">    /// This stores the appropriate integer quantities (derived from the given multiplier) for later use.</span></div><div class="line"><a name="l00030"></a><span class="lineno">   30</span>&#160;<span class="comment">    /// The implementation of this function is adapted from Android NN&#39;s QuantizeMultiplierSmallerThanOne().</span></div><div class="line"><a name="l00031"></a><span class="lineno">   31</span>&#160;<span class="comment"></span>    <a class="code" href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml#abcc6c336d5ed521b1d7c0e7b86a28176">EthosnRefQuantizedMultiplierSmallerThanOne</a>(<span class="keywordtype">float</span> multiplier);</div><div class="line"><a name="l00032"></a><span class="lineno">   32</span>&#160;<span class="comment"></span></div><div class="line"><a name="l00033"></a><span class="lineno">   33</span>&#160;<span class="comment">    /// The implementation of this function is adapted from Android NN&#39;s MultiplyByEthosnRefQuantizedMultiplierSmallerThanOne().</span></div><div class="line"><a name="l00034"></a><span class="lineno">   34</span>&#160;<span class="comment"></span>    int32_t <a class="code" href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml#ac04834893e568dfea4642ecb575faed8">operator*</a>(int32_t rhs) <span class="keyword">const</span>;</div><div class="line"><a name="l00035"></a><span class="lineno">   35</span>&#160;    int64_t <a class="code" href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml#ac04834893e568dfea4642ecb575faed8">operator*</a>(int64_t rhs) <span class="keyword">const</span>;</div><div class="line"><a name="l00036"></a><span class="lineno">   36</span>&#160;</div><div class="line"><a name="l00037"></a><span class="lineno">   37</span>&#160;<span class="keyword">private</span>:</div><div class="line"><a name="l00038"></a><span class="lineno">   38</span>&#160;    int32_t m_Multiplier;</div><div class="line"><a name="l00039"></a><span class="lineno">   39</span>&#160;    int32_t m_RightShift;</div><div class="line"><a name="l00040"></a><span class="lineno">   40</span>&#160;};</div><div class="line"><a name="l00041"></a><span class="lineno">   41</span>&#160;<span class="comment"></span></div><div class="line"><a name="l00042"></a><span class="lineno">   42</span>&#160;<span class="comment">/// An implementation shared by normal and depthwise convolution.</span></div><div class="line"><a name="l00043"></a><span class="lineno">   43</span>&#160;<span class="comment"></span><span class="keyword">template</span>&lt;<span class="keyword">typename</span> ConvData, <span class="keyword">typename</span> InputType, <span class="keyword">typename</span> WeightType, <span class="keyword">typename</span> BiasType, <span class="keyword">typename</span> AccumulatorType&gt;</div><div class="line"><a name="l00044"></a><span class="lineno">   44</span>&#160;<span class="keyword">static</span> <span class="keywordtype">void</span> EthosnRefConvImpl(ConvData data,</div><div class="line"><a name="l00045"></a><span class="lineno">   45</span>&#160;                     <span class="keyword">const</span> InputType* inputData,</div><div class="line"><a name="l00046"></a><span class="lineno">   46</span>&#160;                     <span class="keywordtype">float</span> inputScale,</div><div class="line"><a name="l00047"></a><span class="lineno">   47</span>&#160;                     int32_t inputOffset,</div><div class="line"><a name="l00048"></a><span class="lineno">   48</span>&#160;                     <span class="keyword">const</span> WeightType* filterData,</div><div class="line"><a name="l00049"></a><span class="lineno">   49</span>&#160;                     <span class="keywordtype">float</span> filterScale,</div><div class="line"><a name="l00050"></a><span class="lineno">   50</span>&#160;                     int32_t filterOffset,</div><div class="line"><a name="l00051"></a><span class="lineno">   51</span>&#160;                     <span class="keyword">const</span> BiasType* biasData,</div><div class="line"><a name="l00052"></a><span class="lineno">   52</span>&#160;                     <span class="keywordtype">float</span> outputScale,</div><div class="line"><a name="l00053"></a><span class="lineno">   53</span>&#160;                     int32_t outputOffset,</div><div class="line"><a name="l00054"></a><span class="lineno">   54</span>&#160;                     <span class="keyword">const</span> <a class="code" href="classarmnn_1_1_tensor_info.xhtml">TensorInfo</a>&amp; filterInfo,</div><div class="line"><a name="l00055"></a><span class="lineno">   55</span>&#160;                     <span class="keywordtype">bool</span> depthwise = <span class="keyword">false</span>)</div><div class="line"><a name="l00056"></a><span class="lineno">   56</span>&#160;{</div><div class="line"><a name="l00057"></a><span class="lineno">   57</span>&#160;    <span class="keywordflow">if</span> (data.m_Parameters.m_BiasEnabled &amp;&amp; !biasData)</div><div class="line"><a name="l00058"></a><span class="lineno">   58</span>&#160;    {</div><div class="line"><a name="l00059"></a><span class="lineno">   59</span>&#160;        <span class="keywordflow">throw</span> <a class="code" href="classarmnn_1_1_invalid_argument_exception.xhtml">InvalidArgumentException</a>(<span class="stringliteral">&quot;Bias is enabled but the bias data is invalid&quot;</span>);</div><div class="line"><a name="l00060"></a><span class="lineno">   60</span>&#160;    }</div><div class="line"><a name="l00061"></a><span class="lineno">   61</span>&#160;</div><div class="line"><a name="l00062"></a><span class="lineno">   62</span>&#160;    <span class="keyword">const</span> <a class="code" href="classarmnn_1_1_tensor_info.xhtml">TensorInfo</a>&amp; inputInfo  = <a class="code" href="namespacearmnn_1_1ethosnref.xhtml#a0781fc227cbcb3e82fd137bc0f6638cd">armnn::ethosnref::GetTensorInfo</a>(data.m_Inputs[0]);</div><div class="line"><a name="l00063"></a><span class="lineno">   63</span>&#160;    <span class="keyword">const</span> <a class="code" href="classarmnn_1_1_tensor_info.xhtml">TensorInfo</a>&amp; outputInfo = <a class="code" href="namespacearmnn_1_1ethosnref.xhtml#a0781fc227cbcb3e82fd137bc0f6638cd">armnn::ethosnref::GetTensorInfo</a>(data.m_Outputs[0]);</div><div class="line"><a name="l00064"></a><span class="lineno">   64</span>&#160;</div><div class="line"><a name="l00065"></a><span class="lineno">   65</span>&#160;    <a class="code" href="classarmnn_1_1_tensor_buffer_array_view.xhtml">TensorBufferArrayView&lt;InputType&gt;</a> output(outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>(),</div><div class="line"><a name="l00066"></a><span class="lineno">   66</span>&#160;                                            armnn::ethosnref::GetOutputTensorData&lt;InputType&gt;(0, data),</div><div class="line"><a name="l00067"></a><span class="lineno">   67</span>&#160;                                            data.m_Parameters.m_DataLayout);</div><div class="line"><a name="l00068"></a><span class="lineno">   68</span>&#160;</div><div class="line"><a name="l00069"></a><span class="lineno">   69</span>&#160;    <span class="keyword">const</span> <a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml">armnnUtils::DataLayoutIndexed</a> dataLayoutIndexed(data.m_Parameters.m_DataLayout);</div><div class="line"><a name="l00070"></a><span class="lineno">   70</span>&#160;</div><div class="line"><a name="l00071"></a><span class="lineno">   71</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> channelsIndex = dataLayoutIndexed.<a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a861b2621ee46e4b63379988b360b8cd9">GetChannelsIndex</a>();</div><div class="line"><a name="l00072"></a><span class="lineno">   72</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> heightIndex   = dataLayoutIndexed.<a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a61c00316c443adc233c24e85c6c5b740">GetHeightIndex</a>();</div><div class="line"><a name="l00073"></a><span class="lineno">   73</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> widthIndex    = dataLayoutIndexed.<a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a414e6f95548e6f7a01d5028b55ad3941">GetWidthIndex</a>();</div><div class="line"><a name="l00074"></a><span class="lineno">   74</span>&#160;</div><div class="line"><a name="l00075"></a><span class="lineno">   75</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> depthMultiplier = depthwise ? filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[0] : 1;</div><div class="line"><a name="l00076"></a><span class="lineno">   76</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputChannels   = depthwise ? filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[1] : filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[channelsIndex];</div><div class="line"><a name="l00077"></a><span class="lineno">   77</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> outputChannels  = depthwise ? inputChannels * depthMultiplier : filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[0];</div><div class="line"><a name="l00078"></a><span class="lineno">   78</span>&#160;</div><div class="line"><a name="l00079"></a><span class="lineno">   79</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> batchSize    = outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[0];</div><div class="line"><a name="l00080"></a><span class="lineno">   80</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> outputHeight = outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[heightIndex];</div><div class="line"><a name="l00081"></a><span class="lineno">   81</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> outputWidth  = outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[widthIndex];</div><div class="line"><a name="l00082"></a><span class="lineno">   82</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputHeight  = inputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[heightIndex];</div><div class="line"><a name="l00083"></a><span class="lineno">   83</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputWidth   = inputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[widthIndex];</div><div class="line"><a name="l00084"></a><span class="lineno">   84</span>&#160;</div><div class="line"><a name="l00085"></a><span class="lineno">   85</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterHeight = depthwise ? filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[2] : filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[heightIndex];</div><div class="line"><a name="l00086"></a><span class="lineno">   86</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterWidth  = depthwise ? filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[3] : filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[widthIndex];</div><div class="line"><a name="l00087"></a><span class="lineno">   87</span>&#160;</div><div class="line"><a name="l00088"></a><span class="lineno">   88</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> paddingTop  = data.m_Parameters.m_PadTop;</div><div class="line"><a name="l00089"></a><span class="lineno">   89</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> paddingLeft = data.m_Parameters.m_PadLeft;</div><div class="line"><a name="l00090"></a><span class="lineno">   90</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xStride     = data.m_Parameters.m_StrideX;</div><div class="line"><a name="l00091"></a><span class="lineno">   91</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yStride     = data.m_Parameters.m_StrideY;</div><div class="line"><a name="l00092"></a><span class="lineno">   92</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xDilation   = data.m_Parameters.m_DilationX;</div><div class="line"><a name="l00093"></a><span class="lineno">   93</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yDilation   = data.m_Parameters.m_DilationY;</div><div class="line"><a name="l00094"></a><span class="lineno">   94</span>&#160;</div><div class="line"><a name="l00095"></a><span class="lineno">   95</span>&#160;    <span class="comment">// The world&#39;s least efficient convolution.</span></div><div class="line"><a name="l00096"></a><span class="lineno">   96</span>&#160;    <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> batchIdx = 0; batchIdx &lt; batchSize; batchIdx++)</div><div class="line"><a name="l00097"></a><span class="lineno">   97</span>&#160;    {</div><div class="line"><a name="l00098"></a><span class="lineno">   98</span>&#160;        <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> cOutput = 0; cOutput &lt; outputChannels; cOutput++)</div><div class="line"><a name="l00099"></a><span class="lineno">   99</span>&#160;        {</div><div class="line"><a name="l00100"></a><span class="lineno">  100</span>&#160;            <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yOutput = 0; yOutput &lt; outputHeight; yOutput++)</div><div class="line"><a name="l00101"></a><span class="lineno">  101</span>&#160;            {</div><div class="line"><a name="l00102"></a><span class="lineno">  102</span>&#160;                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xOutput = 0; xOutput &lt; outputWidth; xOutput++)</div><div class="line"><a name="l00103"></a><span class="lineno">  103</span>&#160;                {</div><div class="line"><a name="l00104"></a><span class="lineno">  104</span>&#160;                    <span class="comment">// This loop goes over each output element.</span></div><div class="line"><a name="l00105"></a><span class="lineno">  105</span>&#160;                    AccumulatorType sum = AccumulatorType();</div><div class="line"><a name="l00106"></a><span class="lineno">  106</span>&#160;</div><div class="line"><a name="l00107"></a><span class="lineno">  107</span>&#160;                    <span class="comment">// For depthwise, each output channel corresponds to exactly one input channel.</span></div><div class="line"><a name="l00108"></a><span class="lineno">  108</span>&#160;                    <span class="comment">// For normal, must loop over each input channel.</span></div><div class="line"><a name="l00109"></a><span class="lineno">  109</span>&#160;                    <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> cInput = 0; cInput &lt; (depthwise ? 1 : inputChannels); cInput++)</div><div class="line"><a name="l00110"></a><span class="lineno">  110</span>&#160;                    {</div><div class="line"><a name="l00111"></a><span class="lineno">  111</span>&#160;                        <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> depthwiseMultiplierIdx = 0;</div><div class="line"><a name="l00112"></a><span class="lineno">  112</span>&#160;                        <span class="keywordflow">if</span> (depthwise)</div><div class="line"><a name="l00113"></a><span class="lineno">  113</span>&#160;                        {</div><div class="line"><a name="l00114"></a><span class="lineno">  114</span>&#160;                            cInput = cOutput / depthMultiplier;</div><div class="line"><a name="l00115"></a><span class="lineno">  115</span>&#160;                            depthwiseMultiplierIdx = cOutput % depthMultiplier;</div><div class="line"><a name="l00116"></a><span class="lineno">  116</span>&#160;                        }</div><div class="line"><a name="l00117"></a><span class="lineno">  117</span>&#160;</div><div class="line"><a name="l00118"></a><span class="lineno">  118</span>&#160;                        <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yFilter = 0; yFilter &lt; filterHeight; yFilter++)</div><div class="line"><a name="l00119"></a><span class="lineno">  119</span>&#160;                        {</div><div class="line"><a name="l00120"></a><span class="lineno">  120</span>&#160;                            <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xFilter = 0; xFilter &lt; filterWidth; xFilter++)</div><div class="line"><a name="l00121"></a><span class="lineno">  121</span>&#160;                            {</div><div class="line"><a name="l00122"></a><span class="lineno">  122</span>&#160;                                <span class="comment">// This loop goes over each input element for each output element.</span></div><div class="line"><a name="l00123"></a><span class="lineno">  123</span>&#160;</div><div class="line"><a name="l00124"></a><span class="lineno">  124</span>&#160;                                <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterIndex = 0;</div><div class="line"><a name="l00125"></a><span class="lineno">  125</span>&#160;</div><div class="line"><a name="l00126"></a><span class="lineno">  126</span>&#160;                                <span class="comment">// Since dimensionality of kernel depends on depthwiseness, so does index.</span></div><div class="line"><a name="l00127"></a><span class="lineno">  127</span>&#160;                                <span class="keywordflow">if</span> (depthwise)</div><div class="line"><a name="l00128"></a><span class="lineno">  128</span>&#160;                                {</div><div class="line"><a name="l00129"></a><span class="lineno">  129</span>&#160;                                    filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +</div><div class="line"><a name="l00130"></a><span class="lineno">  130</span>&#160;                                                  cInput * filterWidth * filterHeight +</div><div class="line"><a name="l00131"></a><span class="lineno">  131</span>&#160;                                                  yFilter * filterWidth +</div><div class="line"><a name="l00132"></a><span class="lineno">  132</span>&#160;                                                  xFilter;</div><div class="line"><a name="l00133"></a><span class="lineno">  133</span>&#160;                                }</div><div class="line"><a name="l00134"></a><span class="lineno">  134</span>&#160;                                <span class="keywordflow">else</span></div><div class="line"><a name="l00135"></a><span class="lineno">  135</span>&#160;                                {</div><div class="line"><a name="l00136"></a><span class="lineno">  136</span>&#160;                                    <span class="keywordflow">if</span> (data.m_Parameters.m_DataLayout == <a class="code" href="namespacearmnn.xhtml#ad1d5cce2d9e9a5d61c243e5c989112e0ad066db54b89b0912e7e7c6da51e2da51">DataLayout::NHWC</a>)</div><div class="line"><a name="l00137"></a><span class="lineno">  137</span>&#160;                                    {</div><div class="line"><a name="l00138"></a><span class="lineno">  138</span>&#160;                                        filterIndex = cOutput * filterHeight * filterWidth * inputChannels +</div><div class="line"><a name="l00139"></a><span class="lineno">  139</span>&#160;                                                      yFilter * filterWidth * inputChannels +</div><div class="line"><a name="l00140"></a><span class="lineno">  140</span>&#160;                                                      xFilter * inputChannels +</div><div class="line"><a name="l00141"></a><span class="lineno">  141</span>&#160;                                                      cInput;</div><div class="line"><a name="l00142"></a><span class="lineno">  142</span>&#160;                                    }</div><div class="line"><a name="l00143"></a><span class="lineno">  143</span>&#160;                                    <span class="keywordflow">else</span></div><div class="line"><a name="l00144"></a><span class="lineno">  144</span>&#160;                                    {</div><div class="line"><a name="l00145"></a><span class="lineno">  145</span>&#160;                                        filterIndex = cOutput * filterWidth * filterHeight * inputChannels +</div><div class="line"><a name="l00146"></a><span class="lineno">  146</span>&#160;                                                      cInput  * filterWidth * filterHeight +</div><div class="line"><a name="l00147"></a><span class="lineno">  147</span>&#160;                                                      yFilter * filterWidth +</div><div class="line"><a name="l00148"></a><span class="lineno">  148</span>&#160;                                                      xFilter;</div><div class="line"><a name="l00149"></a><span class="lineno">  149</span>&#160;                                    }</div><div class="line"><a name="l00150"></a><span class="lineno">  150</span>&#160;                                }</div><div class="line"><a name="l00151"></a><span class="lineno">  151</span>&#160;</div><div class="line"><a name="l00152"></a><span class="lineno">  152</span>&#160;                                AccumulatorType filterValue = filterData[filterIndex] -</div><div class="line"><a name="l00153"></a><span class="lineno">  153</span>&#160;                                    <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;AccumulatorType&gt;(filterOffset);</div><div class="line"><a name="l00154"></a><span class="lineno">  154</span>&#160;</div><div class="line"><a name="l00155"></a><span class="lineno">  155</span>&#160;                                <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yInput = yOutput * yStride + yFilter * yDilation;</div><div class="line"><a name="l00156"></a><span class="lineno">  156</span>&#160;                                <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xInput = xOutput * xStride + xFilter * xDilation;</div><div class="line"><a name="l00157"></a><span class="lineno">  157</span>&#160;</div><div class="line"><a name="l00158"></a><span class="lineno">  158</span>&#160;                                AccumulatorType inputValue;</div><div class="line"><a name="l00159"></a><span class="lineno">  159</span>&#160;</div><div class="line"><a name="l00160"></a><span class="lineno">  160</span>&#160;                                <span class="comment">// Check if we&#39;re in the padding.</span></div><div class="line"><a name="l00161"></a><span class="lineno">  161</span>&#160;                                <span class="keywordflow">if</span> (yInput &lt; paddingTop || yInput &gt;= inputHeight + paddingTop ||</div><div class="line"><a name="l00162"></a><span class="lineno">  162</span>&#160;                                    xInput &lt; paddingLeft || xInput &gt;= inputWidth + paddingLeft )</div><div class="line"><a name="l00163"></a><span class="lineno">  163</span>&#160;                                {</div><div class="line"><a name="l00164"></a><span class="lineno">  164</span>&#160;                                    inputValue = AccumulatorType();</div><div class="line"><a name="l00165"></a><span class="lineno">  165</span>&#160;                                }</div><div class="line"><a name="l00166"></a><span class="lineno">  166</span>&#160;                                <span class="keywordflow">else</span></div><div class="line"><a name="l00167"></a><span class="lineno">  167</span>&#160;                                {</div><div class="line"><a name="l00168"></a><span class="lineno">  168</span>&#160;                                    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputIndex;</div><div class="line"><a name="l00169"></a><span class="lineno">  169</span>&#160;</div><div class="line"><a name="l00170"></a><span class="lineno">  170</span>&#160;                                    <span class="keywordflow">if</span> (data.m_Parameters.m_DataLayout == <a class="code" href="namespacearmnn.xhtml#ad1d5cce2d9e9a5d61c243e5c989112e0ad066db54b89b0912e7e7c6da51e2da51">DataLayout::NHWC</a>)</div><div class="line"><a name="l00171"></a><span class="lineno">  171</span>&#160;                                    {</div><div class="line"><a name="l00172"></a><span class="lineno">  172</span>&#160;                                        inputIndex = batchIdx * inputHeight * inputWidth  * inputChannels +</div><div class="line"><a name="l00173"></a><span class="lineno">  173</span>&#160;                                                     (yInput - paddingTop) * inputWidth * inputChannels +</div><div class="line"><a name="l00174"></a><span class="lineno">  174</span>&#160;                                                     (xInput - paddingLeft) * inputChannels +</div><div class="line"><a name="l00175"></a><span class="lineno">  175</span>&#160;                                                     cInput;</div><div class="line"><a name="l00176"></a><span class="lineno">  176</span>&#160;</div><div class="line"><a name="l00177"></a><span class="lineno">  177</span>&#160;                                    }</div><div class="line"><a name="l00178"></a><span class="lineno">  178</span>&#160;                                    <span class="keywordflow">else</span></div><div class="line"><a name="l00179"></a><span class="lineno">  179</span>&#160;                                    {</div><div class="line"><a name="l00180"></a><span class="lineno">  180</span>&#160;                                        inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +</div><div class="line"><a name="l00181"></a><span class="lineno">  181</span>&#160;                                                     inputWidth * inputHeight * cInput +</div><div class="line"><a name="l00182"></a><span class="lineno">  182</span>&#160;                                                     inputWidth * (yInput - paddingTop) +</div><div class="line"><a name="l00183"></a><span class="lineno">  183</span>&#160;                                                     xInput - paddingLeft;</div><div class="line"><a name="l00184"></a><span class="lineno">  184</span>&#160;                                    }</div><div class="line"><a name="l00185"></a><span class="lineno">  185</span>&#160;</div><div class="line"><a name="l00186"></a><span class="lineno">  186</span>&#160;                                    inputValue = inputData[inputIndex] -</div><div class="line"><a name="l00187"></a><span class="lineno">  187</span>&#160;                                                    <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;AccumulatorType&gt;(inputOffset);</div><div class="line"><a name="l00188"></a><span class="lineno">  188</span>&#160;</div><div class="line"><a name="l00189"></a><span class="lineno">  189</span>&#160;                                }</div><div class="line"><a name="l00190"></a><span class="lineno">  190</span>&#160;                                sum += filterValue * inputValue;</div><div class="line"><a name="l00191"></a><span class="lineno">  191</span>&#160;                            }</div><div class="line"><a name="l00192"></a><span class="lineno">  192</span>&#160;                        }</div><div class="line"><a name="l00193"></a><span class="lineno">  193</span>&#160;                    }</div><div class="line"><a name="l00194"></a><span class="lineno">  194</span>&#160;</div><div class="line"><a name="l00195"></a><span class="lineno">  195</span>&#160;                    <span class="keywordflow">if</span> (data.m_Parameters.m_BiasEnabled)</div><div class="line"><a name="l00196"></a><span class="lineno">  196</span>&#160;                    {</div><div class="line"><a name="l00197"></a><span class="lineno">  197</span>&#160;                        sum += biasData[cOutput];</div><div class="line"><a name="l00198"></a><span class="lineno">  198</span>&#160;                    }</div><div class="line"><a name="l00199"></a><span class="lineno">  199</span>&#160;</div><div class="line"><a name="l00200"></a><span class="lineno">  200</span>&#160;                    <span class="keywordflow">if</span> (outputScale != 0.0f)</div><div class="line"><a name="l00201"></a><span class="lineno">  201</span>&#160;                    {</div><div class="line"><a name="l00202"></a><span class="lineno">  202</span>&#160;                        <span class="keywordtype">float</span> multiplier = (inputScale * filterScale) / outputScale;</div><div class="line"><a name="l00203"></a><span class="lineno">  203</span>&#160;</div><div class="line"><a name="l00204"></a><span class="lineno">  204</span>&#160;                        sum = <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;AccumulatorType&gt;(</div><div class="line"><a name="l00205"></a><span class="lineno">  205</span>&#160;                                <a class="code" href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml#abcc6c336d5ed521b1d7c0e7b86a28176">EthosnRefQuantizedMultiplierSmallerThanOne</a>(multiplier) * sum)</div><div class="line"><a name="l00206"></a><span class="lineno">  206</span>&#160;                            + numeric_cast&lt;AccumulatorType&gt;(outputOffset);</div><div class="line"><a name="l00207"></a><span class="lineno">  207</span>&#160;                        sum = std::min&lt;AccumulatorType&gt;(std::max&lt;AccumulatorType&gt;(sum, std::numeric_limits&lt;InputType&gt;::min()),</div><div class="line"><a name="l00208"></a><span class="lineno">  208</span>&#160;                                std::numeric_limits&lt;InputType&gt;::max());</div><div class="line"><a name="l00209"></a><span class="lineno">  209</span>&#160;                    }</div><div class="line"><a name="l00210"></a><span class="lineno">  210</span>&#160;</div><div class="line"><a name="l00211"></a><span class="lineno">  211</span>&#160;                    output.<a class="code" href="classarmnn_1_1_tensor_buffer_array_view.xhtml#ab3a2cf851173535ea07b9d87eaf3ca01">Get</a>(batchIdx, cOutput, yOutput, xOutput) = <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;InputType&gt;(sum);</div><div class="line"><a name="l00212"></a><span class="lineno">  212</span>&#160;                }</div><div class="line"><a name="l00213"></a><span class="lineno">  213</span>&#160;            }</div><div class="line"><a name="l00214"></a><span class="lineno">  214</span>&#160;        }</div><div class="line"><a name="l00215"></a><span class="lineno">  215</span>&#160;    }</div><div class="line"><a name="l00216"></a><span class="lineno">  216</span>&#160;}</div><div class="line"><a name="l00217"></a><span class="lineno">  217</span>&#160;</div><div class="line"><a name="l00218"></a><span class="lineno"><a class="line" href="structarmnn_1_1hw__float.xhtml">  218</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structarmnn_1_1hw__float.xhtml">hw_float</a></div><div class="line"><a name="l00219"></a><span class="lineno">  219</span>&#160;{</div><div class="line"><a name="l00220"></a><span class="lineno"><a class="line" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">  220</a></span>&#160;    <span class="keywordtype">bool</span>        <a class="code" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">sign</a>;</div><div class="line"><a name="l00221"></a><span class="lineno"><a class="line" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">  221</a></span>&#160;    uint32_t    <a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>;</div><div class="line"><a name="l00222"></a><span class="lineno"><a class="line" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">  222</a></span>&#160;    uint32_t    <a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a>;</div><div class="line"><a name="l00223"></a><span class="lineno">  223</span>&#160;};</div><div class="line"><a name="l00224"></a><span class="lineno">  224</span>&#160;</div><div class="line"><a name="l00225"></a><span class="lineno"><a class="line" href="namespacearmnn.xhtml#aa7dfb0fb6a458a92ec5bb985aea1daae">  225</a></span>&#160;<span class="keyword">inline</span> uint32_t <a class="code" href="namespacearmnn.xhtml#aa7dfb0fb6a458a92ec5bb985aea1daae">one_mask</a>(<span class="keyword">const</span> uint8_t size)</div><div class="line"><a name="l00226"></a><span class="lineno">  226</span>&#160;{</div><div class="line"><a name="l00227"></a><span class="lineno">  227</span>&#160;    uint32_t mask = 0;</div><div class="line"><a name="l00228"></a><span class="lineno">  228</span>&#160;    <span class="keywordflow">if</span> (size &gt;= 32)</div><div class="line"><a name="l00229"></a><span class="lineno">  229</span>&#160;    {</div><div class="line"><a name="l00230"></a><span class="lineno">  230</span>&#160;        mask =  0xFFFFFFFF;</div><div class="line"><a name="l00231"></a><span class="lineno">  231</span>&#160;    }</div><div class="line"><a name="l00232"></a><span class="lineno">  232</span>&#160;    <span class="keywordflow">else</span></div><div class="line"><a name="l00233"></a><span class="lineno">  233</span>&#160;    {</div><div class="line"><a name="l00234"></a><span class="lineno">  234</span>&#160;        mask = (1u &lt;&lt; size) - 1;</div><div class="line"><a name="l00235"></a><span class="lineno">  235</span>&#160;    }</div><div class="line"><a name="l00236"></a><span class="lineno">  236</span>&#160;    <span class="keywordflow">return</span> mask;</div><div class="line"><a name="l00237"></a><span class="lineno">  237</span>&#160;}</div><div class="line"><a name="l00238"></a><span class="lineno">  238</span>&#160;</div><div class="line"><a name="l00239"></a><span class="lineno"><a class="line" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">  239</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">bool</span> <a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(</div><div class="line"><a name="l00240"></a><span class="lineno">  240</span>&#160;    <span class="keyword">const</span> uint8_t pos,</div><div class="line"><a name="l00241"></a><span class="lineno">  241</span>&#160;    <span class="keyword">const</span> uint32_t val)</div><div class="line"><a name="l00242"></a><span class="lineno">  242</span>&#160;{</div><div class="line"><a name="l00243"></a><span class="lineno">  243</span>&#160;    <span class="keywordflow">return</span> (((val &gt;&gt; pos) &amp; 1u) == 1u);</div><div class="line"><a name="l00244"></a><span class="lineno">  244</span>&#160;}</div><div class="line"><a name="l00245"></a><span class="lineno">  245</span>&#160;</div><div class="line"><a name="l00246"></a><span class="lineno"><a class="line" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">  246</a></span>&#160;<span class="keyword">inline</span> uint32_t <a class="code" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">bit_field</a>(</div><div class="line"><a name="l00247"></a><span class="lineno">  247</span>&#160;    <span class="keyword">const</span> uint8_t high_bit,</div><div class="line"><a name="l00248"></a><span class="lineno">  248</span>&#160;    <span class="keyword">const</span> uint8_t low_bit,</div><div class="line"><a name="l00249"></a><span class="lineno">  249</span>&#160;    <span class="keyword">const</span> uint32_t val)</div><div class="line"><a name="l00250"></a><span class="lineno">  250</span>&#160;{</div><div class="line"><a name="l00251"></a><span class="lineno">  251</span>&#160;    uint8_t size;</div><div class="line"><a name="l00252"></a><span class="lineno">  252</span>&#160;    <span class="keywordtype">int</span> src = high_bit + 1 - low_bit;</div><div class="line"><a name="l00253"></a><span class="lineno">  253</span>&#160;    memcpy(&amp;size, &amp;src, <span class="keyword">sizeof</span>(uint8_t));</div><div class="line"><a name="l00254"></a><span class="lineno">  254</span>&#160;</div><div class="line"><a name="l00255"></a><span class="lineno">  255</span>&#160;    <span class="comment">//return ((val &gt;&gt; low_bit) &amp; one_mask(high_bit + 1 - low_bit));</span></div><div class="line"><a name="l00256"></a><span class="lineno">  256</span>&#160;    <span class="keywordflow">return</span> ((val &gt;&gt; low_bit) &amp; <a class="code" href="namespacearmnn.xhtml#aa7dfb0fb6a458a92ec5bb985aea1daae">one_mask</a>(size));</div><div class="line"><a name="l00257"></a><span class="lineno">  257</span>&#160;}</div><div class="line"><a name="l00258"></a><span class="lineno">  258</span>&#160;</div><div class="line"><a name="l00259"></a><span class="lineno"><a class="line" href="namespacearmnn.xhtml#a1c8a12a9ede3260e20c65238f20a52a6">  259</a></span>&#160;<span class="keyword">inline</span> uint32_t <a class="code" href="namespacearmnn.xhtml#a1c8a12a9ede3260e20c65238f20a52a6">wtfp_round_mantissa</a>(</div><div class="line"><a name="l00260"></a><span class="lineno">  260</span>&#160;    <span class="keyword">const</span> uint32_t unshifted_mantissa,</div><div class="line"><a name="l00261"></a><span class="lineno">  261</span>&#160;    <span class="keyword">const</span> uint32_t <a class="code" href="structarmnn_1_1exp.xhtml">exp</a>)</div><div class="line"><a name="l00262"></a><span class="lineno">  262</span>&#160;{</div><div class="line"><a name="l00263"></a><span class="lineno">  263</span>&#160;    uint32_t rounded_mantissa = 0;</div><div class="line"><a name="l00264"></a><span class="lineno">  264</span>&#160;    <span class="comment">// No shifting means no rounding</span></div><div class="line"><a name="l00265"></a><span class="lineno">  265</span>&#160;    <span class="keywordflow">if</span> (exp == 0)</div><div class="line"><a name="l00266"></a><span class="lineno">  266</span>&#160;    {</div><div class="line"><a name="l00267"></a><span class="lineno">  267</span>&#160;        rounded_mantissa = unshifted_mantissa;</div><div class="line"><a name="l00268"></a><span class="lineno">  268</span>&#160;    }</div><div class="line"><a name="l00269"></a><span class="lineno">  269</span>&#160;    <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((unshifted_mantissa &gt;&gt; exp) == 0xff)</div><div class="line"><a name="l00270"></a><span class="lineno">  270</span>&#160;    {</div><div class="line"><a name="l00271"></a><span class="lineno">  271</span>&#160;        <span class="comment">// Rounding overflow case?</span></div><div class="line"><a name="l00272"></a><span class="lineno">  272</span>&#160;        rounded_mantissa = 0xFF;</div><div class="line"><a name="l00273"></a><span class="lineno">  273</span>&#160;    }</div><div class="line"><a name="l00274"></a><span class="lineno">  274</span>&#160;    <span class="keywordflow">else</span></div><div class="line"><a name="l00275"></a><span class="lineno">  275</span>&#160;    {</div><div class="line"><a name="l00276"></a><span class="lineno">  276</span>&#160;        <span class="comment">// Pull off the most significant bit that &#39;s being dropped off&#39;</span></div><div class="line"><a name="l00277"></a><span class="lineno">  277</span>&#160;        <span class="comment">// and use that to round to nearest.</span></div><div class="line"><a name="l00278"></a><span class="lineno">  278</span>&#160;        uint32_t round_bit = (unshifted_mantissa &gt;&gt; (exp - 1)) &amp; 1;</div><div class="line"><a name="l00279"></a><span class="lineno">  279</span>&#160;        rounded_mantissa = (unshifted_mantissa &gt;&gt; exp) + round_bit;</div><div class="line"><a name="l00280"></a><span class="lineno">  280</span>&#160;    }</div><div class="line"><a name="l00281"></a><span class="lineno">  281</span>&#160;</div><div class="line"><a name="l00282"></a><span class="lineno">  282</span>&#160;    <span class="keywordflow">return</span> rounded_mantissa;</div><div class="line"><a name="l00283"></a><span class="lineno">  283</span>&#160;}</div><div class="line"><a name="l00284"></a><span class="lineno">  284</span>&#160;</div><div class="line"><a name="l00285"></a><span class="lineno"><a class="line" href="namespacearmnn.xhtml#a8b11b1deeaebaeeb3fa73c25099a226f">  285</a></span>&#160;<span class="keyword">inline</span> <span class="keyword">struct </span><a class="code" href="structarmnn_1_1hw__float.xhtml">hw_float</a> <a class="code" href="namespacearmnn.xhtml#a8b11b1deeaebaeeb3fa73c25099a226f">convert_to_S12E8M</a>(</div><div class="line"><a name="l00286"></a><span class="lineno">  286</span>&#160;    const int32_t val)</div><div class="line"><a name="l00287"></a><span class="lineno">  287</span>&#160;{</div><div class="line"><a name="l00288"></a><span class="lineno">  288</span>&#160;    uint32_t unsigned_val;</div><div class="line"><a name="l00289"></a><span class="lineno">  289</span>&#160;    uint32_t unsigned_not_val;</div><div class="line"><a name="l00290"></a><span class="lineno">  290</span>&#160;    int32_t  not_val = ~val + 1;</div><div class="line"><a name="l00291"></a><span class="lineno">  291</span>&#160;</div><div class="line"><a name="l00292"></a><span class="lineno">  292</span>&#160;    memcpy(&amp;unsigned_val, &amp;val, <span class="keyword">sizeof</span>(uint32_t));</div><div class="line"><a name="l00293"></a><span class="lineno">  293</span>&#160;    memcpy(&amp;unsigned_not_val, &amp;not_val, <span class="keyword">sizeof</span>(uint32_t));</div><div class="line"><a name="l00294"></a><span class="lineno">  294</span>&#160;</div><div class="line"><a name="l00295"></a><span class="lineno">  295</span>&#160;    <span class="keyword">struct </span><a class="code" href="structarmnn_1_1hw__float.xhtml">hw_float</a> ret_fp;</div><div class="line"><a name="l00296"></a><span class="lineno">  296</span>&#160;    ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">sign</a> = <a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(10, unsigned_val);</div><div class="line"><a name="l00297"></a><span class="lineno">  297</span>&#160;    <span class="comment">// Convert to unsigned mantissa</span></div><div class="line"><a name="l00298"></a><span class="lineno">  298</span>&#160;    <span class="keywordflow">if</span> (ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">sign</a>)</div><div class="line"><a name="l00299"></a><span class="lineno">  299</span>&#160;    {</div><div class="line"><a name="l00300"></a><span class="lineno">  300</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a> = <a class="code" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">bit_field</a>(9, 0, unsigned_not_val);</div><div class="line"><a name="l00301"></a><span class="lineno">  301</span>&#160;    }</div><div class="line"><a name="l00302"></a><span class="lineno">  302</span>&#160;    <span class="keywordflow">else</span></div><div class="line"><a name="l00303"></a><span class="lineno">  303</span>&#160;    {</div><div class="line"><a name="l00304"></a><span class="lineno">  304</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a> = <a class="code" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">bit_field</a>(9, 0, unsigned_val);</div><div class="line"><a name="l00305"></a><span class="lineno">  305</span>&#160;    }</div><div class="line"><a name="l00306"></a><span class="lineno">  306</span>&#160;    <span class="comment">// Use leading 1 to extract the exponent and mantissa</span></div><div class="line"><a name="l00307"></a><span class="lineno">  307</span>&#160;    ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 0;</div><div class="line"><a name="l00308"></a><span class="lineno">  308</span>&#160;    <span class="keywordflow">if</span> (<a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(9, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>))</div><div class="line"><a name="l00309"></a><span class="lineno">  309</span>&#160;    {</div><div class="line"><a name="l00310"></a><span class="lineno">  310</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 2;</div><div class="line"><a name="l00311"></a><span class="lineno">  311</span>&#160;    }</div><div class="line"><a name="l00312"></a><span class="lineno">  312</span>&#160;    <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(8, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>))</div><div class="line"><a name="l00313"></a><span class="lineno">  313</span>&#160;    {</div><div class="line"><a name="l00314"></a><span class="lineno">  314</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 1;</div><div class="line"><a name="l00315"></a><span class="lineno">  315</span>&#160;    }</div><div class="line"><a name="l00316"></a><span class="lineno">  316</span>&#160;    ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a> = <a class="code" href="namespacearmnn.xhtml#a1c8a12a9ede3260e20c65238f20a52a6">wtfp_round_mantissa</a>(<a class="code" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">bit_field</a>(10, 0, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>), ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a>);</div><div class="line"><a name="l00317"></a><span class="lineno">  317</span>&#160;    <span class="keywordflow">return</span> ret_fp;</div><div class="line"><a name="l00318"></a><span class="lineno">  318</span>&#160;}</div><div class="line"><a name="l00319"></a><span class="lineno">  319</span>&#160;</div><div class="line"><a name="l00320"></a><span class="lineno"><a class="line" href="namespacearmnn.xhtml#a172004b54c3faafcdba1e63fa2f2eb0c">  320</a></span>&#160;<span class="keyword">inline</span> <span class="keyword">struct </span><a class="code" href="structarmnn_1_1hw__float.xhtml">hw_float</a> <a class="code" href="namespacearmnn.xhtml#a172004b54c3faafcdba1e63fa2f2eb0c">convert_to_S13E8M</a>(</div><div class="line"><a name="l00321"></a><span class="lineno">  321</span>&#160;    const int32_t val)</div><div class="line"><a name="l00322"></a><span class="lineno">  322</span>&#160;{</div><div class="line"><a name="l00323"></a><span class="lineno">  323</span>&#160;    uint32_t unsigned_val;</div><div class="line"><a name="l00324"></a><span class="lineno">  324</span>&#160;    uint32_t unsigned_not_val;</div><div class="line"><a name="l00325"></a><span class="lineno">  325</span>&#160;    int32_t  not_val = ~val + 1;</div><div class="line"><a name="l00326"></a><span class="lineno">  326</span>&#160;</div><div class="line"><a name="l00327"></a><span class="lineno">  327</span>&#160;    memcpy(&amp;unsigned_val, &amp;val, <span class="keyword">sizeof</span>(uint32_t));</div><div class="line"><a name="l00328"></a><span class="lineno">  328</span>&#160;    memcpy(&amp;unsigned_not_val, &amp;not_val, <span class="keyword">sizeof</span>(uint32_t));</div><div class="line"><a name="l00329"></a><span class="lineno">  329</span>&#160;</div><div class="line"><a name="l00330"></a><span class="lineno">  330</span>&#160;    <span class="keyword">struct </span><a class="code" href="structarmnn_1_1hw__float.xhtml">hw_float</a> ret_fp;</div><div class="line"><a name="l00331"></a><span class="lineno">  331</span>&#160;    ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">sign</a> = <a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(12, unsigned_val);</div><div class="line"><a name="l00332"></a><span class="lineno">  332</span>&#160;    <span class="comment">// Convert to unsigned mantissa</span></div><div class="line"><a name="l00333"></a><span class="lineno">  333</span>&#160;    <span class="keywordflow">if</span> (ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">sign</a>)</div><div class="line"><a name="l00334"></a><span class="lineno">  334</span>&#160;    {</div><div class="line"><a name="l00335"></a><span class="lineno">  335</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a> = <a class="code" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">bit_field</a>(11, 0, unsigned_not_val);</div><div class="line"><a name="l00336"></a><span class="lineno">  336</span>&#160;    }</div><div class="line"><a name="l00337"></a><span class="lineno">  337</span>&#160;    <span class="keywordflow">else</span></div><div class="line"><a name="l00338"></a><span class="lineno">  338</span>&#160;    {</div><div class="line"><a name="l00339"></a><span class="lineno">  339</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a> = <a class="code" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">bit_field</a>(11, 0, unsigned_val);</div><div class="line"><a name="l00340"></a><span class="lineno">  340</span>&#160;    }</div><div class="line"><a name="l00341"></a><span class="lineno">  341</span>&#160;    <span class="comment">// Use leading 1 to extract the exponent and mantissa</span></div><div class="line"><a name="l00342"></a><span class="lineno">  342</span>&#160;    ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 0;</div><div class="line"><a name="l00343"></a><span class="lineno">  343</span>&#160;    <span class="keywordflow">if</span> (<a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(11, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>))</div><div class="line"><a name="l00344"></a><span class="lineno">  344</span>&#160;    {</div><div class="line"><a name="l00345"></a><span class="lineno">  345</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 4;</div><div class="line"><a name="l00346"></a><span class="lineno">  346</span>&#160;    }</div><div class="line"><a name="l00347"></a><span class="lineno">  347</span>&#160;    <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(10, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>))</div><div class="line"><a name="l00348"></a><span class="lineno">  348</span>&#160;    {</div><div class="line"><a name="l00349"></a><span class="lineno">  349</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 3;</div><div class="line"><a name="l00350"></a><span class="lineno">  350</span>&#160;    }</div><div class="line"><a name="l00351"></a><span class="lineno">  351</span>&#160;    <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(9, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>))</div><div class="line"><a name="l00352"></a><span class="lineno">  352</span>&#160;    {</div><div class="line"><a name="l00353"></a><span class="lineno">  353</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 2;</div><div class="line"><a name="l00354"></a><span class="lineno">  354</span>&#160;    }</div><div class="line"><a name="l00355"></a><span class="lineno">  355</span>&#160;    <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">bit_extract</a>(8, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>))</div><div class="line"><a name="l00356"></a><span class="lineno">  356</span>&#160;    {</div><div class="line"><a name="l00357"></a><span class="lineno">  357</span>&#160;        ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a> = 1;</div><div class="line"><a name="l00358"></a><span class="lineno">  358</span>&#160;    }</div><div class="line"><a name="l00359"></a><span class="lineno">  359</span>&#160;    ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a> = <a class="code" href="namespacearmnn.xhtml#a1c8a12a9ede3260e20c65238f20a52a6">wtfp_round_mantissa</a>(<a class="code" href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">bit_field</a>(11, 0, ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>), ret_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a>);</div><div class="line"><a name="l00360"></a><span class="lineno">  360</span>&#160;    <span class="keywordflow">return</span> ret_fp;</div><div class="line"><a name="l00361"></a><span class="lineno">  361</span>&#160;}</div><div class="line"><a name="l00362"></a><span class="lineno">  362</span>&#160;</div><div class="line"><a name="l00363"></a><span class="lineno"><a class="line" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">  363</a></span>&#160;<span class="keyword">inline</span> int64_t <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a> (int64_t value, <span class="keywordtype">bool</span> round=<span class="keyword">false</span>) {</div><div class="line"><a name="l00364"></a><span class="lineno">  364</span>&#160;</div><div class="line"><a name="l00365"></a><span class="lineno">  365</span>&#160;  int64_t round_bit = round? value &amp; 0x1 : 0;</div><div class="line"><a name="l00366"></a><span class="lineno">  366</span>&#160;    int64_t out       = (value &gt;&gt; 1) + round_bit;</div><div class="line"><a name="l00367"></a><span class="lineno">  367</span>&#160;  <span class="keywordflow">return</span> out;</div><div class="line"><a name="l00368"></a><span class="lineno">  368</span>&#160;}</div><div class="line"><a name="l00369"></a><span class="lineno">  369</span>&#160;</div><div class="line"><a name="l00370"></a><span class="lineno">  370</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> ConvData, <span class="keyword">typename</span> InputType, <span class="keyword">typename</span> WeightType, <span class="keyword">typename</span> BiasType&gt;</div><div class="line"><a name="l00371"></a><span class="lineno">  371</span>&#160;<span class="keyword">static</span> <span class="keywordtype">void</span> EthosnRefWinogradConvImpl(ConvData data,</div><div class="line"><a name="l00372"></a><span class="lineno">  372</span>&#160;                     <span class="keyword">const</span> InputType* inputData,</div><div class="line"><a name="l00373"></a><span class="lineno">  373</span>&#160;                     <span class="keywordtype">float</span> inputScale,</div><div class="line"><a name="l00374"></a><span class="lineno">  374</span>&#160;                     int32_t inputOffset,</div><div class="line"><a name="l00375"></a><span class="lineno">  375</span>&#160;                     <span class="keyword">const</span> WeightType* filterData,</div><div class="line"><a name="l00376"></a><span class="lineno">  376</span>&#160;                     <span class="keywordtype">float</span> filterScale,</div><div class="line"><a name="l00377"></a><span class="lineno">  377</span>&#160;                     int32_t filterOffset,</div><div class="line"><a name="l00378"></a><span class="lineno">  378</span>&#160;                     <span class="keyword">const</span> BiasType* biasData,</div><div class="line"><a name="l00379"></a><span class="lineno">  379</span>&#160;                     <span class="keywordtype">float</span> outputScale,</div><div class="line"><a name="l00380"></a><span class="lineno">  380</span>&#160;                     int32_t outputOffset,</div><div class="line"><a name="l00381"></a><span class="lineno">  381</span>&#160;                     <span class="keyword">const</span> <a class="code" href="classarmnn_1_1_tensor_info.xhtml">TensorInfo</a>&amp; filterInfo)</div><div class="line"><a name="l00382"></a><span class="lineno">  382</span>&#160;{</div><div class="line"><a name="l00383"></a><span class="lineno">  383</span>&#160;    <span class="keywordflow">if</span> (data.m_Parameters.m_BiasEnabled &amp;&amp; !biasData)</div><div class="line"><a name="l00384"></a><span class="lineno">  384</span>&#160;    {</div><div class="line"><a name="l00385"></a><span class="lineno">  385</span>&#160;        <span class="keywordflow">throw</span> <a class="code" href="classarmnn_1_1_invalid_argument_exception.xhtml">InvalidArgumentException</a>(<span class="stringliteral">&quot;Bias is enabled but the bias data is invalid&quot;</span>);</div><div class="line"><a name="l00386"></a><span class="lineno">  386</span>&#160;    }</div><div class="line"><a name="l00387"></a><span class="lineno">  387</span>&#160;</div><div class="line"><a name="l00388"></a><span class="lineno">  388</span>&#160;    <span class="keyword">const</span> <a class="code" href="classarmnn_1_1_tensor_info.xhtml">TensorInfo</a>&amp; inputInfo  = <a class="code" href="namespacearmnn_1_1ethosnref.xhtml#a0781fc227cbcb3e82fd137bc0f6638cd">armnn::ethosnref::GetTensorInfo</a>(data.m_Inputs[0]);</div><div class="line"><a name="l00389"></a><span class="lineno">  389</span>&#160;    <span class="keyword">const</span> <a class="code" href="classarmnn_1_1_tensor_info.xhtml">TensorInfo</a>&amp; outputInfo = <a class="code" href="namespacearmnn_1_1ethosnref.xhtml#a0781fc227cbcb3e82fd137bc0f6638cd">armnn::ethosnref::GetTensorInfo</a>(data.m_Outputs[0]);</div><div class="line"><a name="l00390"></a><span class="lineno">  390</span>&#160;</div><div class="line"><a name="l00391"></a><span class="lineno">  391</span>&#160;    <a class="code" href="classarmnn_1_1_tensor_buffer_array_view.xhtml">TensorBufferArrayView&lt;InputType&gt;</a> output(outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>(),</div><div class="line"><a name="l00392"></a><span class="lineno">  392</span>&#160;                                            armnn::ethosnref::GetOutputTensorData&lt;InputType&gt;(0, data),</div><div class="line"><a name="l00393"></a><span class="lineno">  393</span>&#160;                                            data.m_Parameters.m_DataLayout);</div><div class="line"><a name="l00394"></a><span class="lineno">  394</span>&#160;</div><div class="line"><a name="l00395"></a><span class="lineno">  395</span>&#160;    <span class="keyword">const</span> <a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml">armnnUtils::DataLayoutIndexed</a> dataLayoutIndexed(data.m_Parameters.m_DataLayout);</div><div class="line"><a name="l00396"></a><span class="lineno">  396</span>&#160;</div><div class="line"><a name="l00397"></a><span class="lineno">  397</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> channelsIndex = dataLayoutIndexed.<a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a861b2621ee46e4b63379988b360b8cd9">GetChannelsIndex</a>();</div><div class="line"><a name="l00398"></a><span class="lineno">  398</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> heightIndex   = dataLayoutIndexed.<a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a61c00316c443adc233c24e85c6c5b740">GetHeightIndex</a>();</div><div class="line"><a name="l00399"></a><span class="lineno">  399</span>&#160;    <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> widthIndex    = dataLayoutIndexed.<a class="code" href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a414e6f95548e6f7a01d5028b55ad3941">GetWidthIndex</a>();</div><div class="line"><a name="l00400"></a><span class="lineno">  400</span>&#160;</div><div class="line"><a name="l00401"></a><span class="lineno">  401</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputChannels   = filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[channelsIndex];</div><div class="line"><a name="l00402"></a><span class="lineno">  402</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> outputChannels  = filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[0];</div><div class="line"><a name="l00403"></a><span class="lineno">  403</span>&#160;</div><div class="line"><a name="l00404"></a><span class="lineno">  404</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> batchSize    = outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[0];</div><div class="line"><a name="l00405"></a><span class="lineno">  405</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> outputHeight = outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[heightIndex];</div><div class="line"><a name="l00406"></a><span class="lineno">  406</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> outputWidth  = outputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[widthIndex];</div><div class="line"><a name="l00407"></a><span class="lineno">  407</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputHeight  = inputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[heightIndex];</div><div class="line"><a name="l00408"></a><span class="lineno">  408</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputWidth   = inputInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[widthIndex];</div><div class="line"><a name="l00409"></a><span class="lineno">  409</span>&#160;</div><div class="line"><a name="l00410"></a><span class="lineno">  410</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> paddingTop  = data.m_Parameters.m_PadTop;</div><div class="line"><a name="l00411"></a><span class="lineno">  411</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> paddingLeft = data.m_Parameters.m_PadLeft;</div><div class="line"><a name="l00412"></a><span class="lineno">  412</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xStride     = data.m_Parameters.m_StrideX;</div><div class="line"><a name="l00413"></a><span class="lineno">  413</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yStride     = data.m_Parameters.m_StrideY;</div><div class="line"><a name="l00414"></a><span class="lineno">  414</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xDilation   = data.m_Parameters.m_DilationX;</div><div class="line"><a name="l00415"></a><span class="lineno">  415</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yDilation   = data.m_Parameters.m_DilationY;</div><div class="line"><a name="l00416"></a><span class="lineno">  416</span>&#160;</div><div class="line"><a name="l00417"></a><span class="lineno">  417</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterHeight = filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[heightIndex];</div><div class="line"><a name="l00418"></a><span class="lineno">  418</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterWidth  = filterInfo.<a class="code" href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">GetShape</a>()[widthIndex];</div><div class="line"><a name="l00419"></a><span class="lineno">  419</span>&#160;</div><div class="line"><a name="l00420"></a><span class="lineno">  420</span>&#160;    <span class="comment">// figure out size of Kernel and how many kernel patches</span></div><div class="line"><a name="l00421"></a><span class="lineno">  421</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterHeight_roundup = (filterHeight == 1)? 1 : 3 * <a class="code" href="namespacearmnn_1_1ethosnref.xhtml#abeb64a06a58ebe59c4a832f29246b550">armnn::ethosnref::DivideRoundUp</a>(filterHeight, 3);</div><div class="line"><a name="l00422"></a><span class="lineno">  422</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> decomposedHeight     = (filterHeight == 1)? 1 : 3;</div><div class="line"><a name="l00423"></a><span class="lineno">  423</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterWidth_roundup  = (filterWidth == 1) ? 1 : 3 * <a class="code" href="namespacearmnn_1_1ethosnref.xhtml#abeb64a06a58ebe59c4a832f29246b550">armnn::ethosnref::DivideRoundUp</a>(filterWidth, 3);</div><div class="line"><a name="l00424"></a><span class="lineno">  424</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> decomposedWidth      = (filterWidth == 1) ? 1 : 3;</div><div class="line"><a name="l00425"></a><span class="lineno">  425</span>&#160;</div><div class="line"><a name="l00426"></a><span class="lineno">  426</span>&#160;    <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> h_dim, w_dim, w_stride, h_stride;</div><div class="line"><a name="l00427"></a><span class="lineno">  427</span>&#160;    <span class="keywordflow">if</span> (filterHeight == 1) {</div><div class="line"><a name="l00428"></a><span class="lineno">  428</span>&#160;        h_dim = 1;</div><div class="line"><a name="l00429"></a><span class="lineno">  429</span>&#160;        w_dim = 4;</div><div class="line"><a name="l00430"></a><span class="lineno">  430</span>&#160;        h_stride = 4;</div><div class="line"><a name="l00431"></a><span class="lineno">  431</span>&#160;        w_stride = 2;</div><div class="line"><a name="l00432"></a><span class="lineno">  432</span>&#160;    } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (filterWidth == 1) {</div><div class="line"><a name="l00433"></a><span class="lineno">  433</span>&#160;        h_dim = 4;</div><div class="line"><a name="l00434"></a><span class="lineno">  434</span>&#160;        w_dim = 1;</div><div class="line"><a name="l00435"></a><span class="lineno">  435</span>&#160;        h_stride = 2;</div><div class="line"><a name="l00436"></a><span class="lineno">  436</span>&#160;        w_stride = 4;</div><div class="line"><a name="l00437"></a><span class="lineno">  437</span>&#160;    } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00438"></a><span class="lineno">  438</span>&#160;        h_dim = 4;</div><div class="line"><a name="l00439"></a><span class="lineno">  439</span>&#160;        w_dim = 4;</div><div class="line"><a name="l00440"></a><span class="lineno">  440</span>&#160;        h_stride = 2;</div><div class="line"><a name="l00441"></a><span class="lineno">  441</span>&#160;        w_stride = 2;</div><div class="line"><a name="l00442"></a><span class="lineno">  442</span>&#160;    }</div><div class="line"><a name="l00443"></a><span class="lineno">  443</span>&#160;</div><div class="line"><a name="l00444"></a><span class="lineno">  444</span>&#160;    <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> batchIdx = 0; batchIdx &lt; batchSize; batchIdx++)</div><div class="line"><a name="l00445"></a><span class="lineno">  445</span>&#160;    {</div><div class="line"><a name="l00446"></a><span class="lineno">  446</span>&#160;        <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> cOutput = 0; cOutput &lt; outputChannels; cOutput++)</div><div class="line"><a name="l00447"></a><span class="lineno">  447</span>&#160;        {</div><div class="line"><a name="l00448"></a><span class="lineno">  448</span>&#160;            <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yOutput = 0; yOutput &lt; outputHeight; yOutput=yOutput+h_stride)</div><div class="line"><a name="l00449"></a><span class="lineno">  449</span>&#160;            {</div><div class="line"><a name="l00450"></a><span class="lineno">  450</span>&#160;                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xOutput = 0; xOutput &lt; outputWidth; xOutput=xOutput+w_stride)</div><div class="line"><a name="l00451"></a><span class="lineno">  451</span>&#160;                {</div><div class="line"><a name="l00452"></a><span class="lineno">  452</span>&#160;                   <span class="comment">// This loop goes over each output element - per winograd output patches (h_stride x w_stride).</span></div><div class="line"><a name="l00453"></a><span class="lineno">  453</span>&#160;                   std::vector&lt;int64_t&gt; MAC(4*4, 0);</div><div class="line"><a name="l00454"></a><span class="lineno">  454</span>&#160;</div><div class="line"><a name="l00455"></a><span class="lineno">  455</span>&#160;                   <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> cInput = 0; cInput &lt; inputChannels; cInput++)</div><div class="line"><a name="l00456"></a><span class="lineno">  456</span>&#160;                   {</div><div class="line"><a name="l00457"></a><span class="lineno">  457</span>&#160;                      <span class="comment">// This loop goes over each filter element - per winograd filter patches (decomposedHeight x decomposedWidth).</span></div><div class="line"><a name="l00458"></a><span class="lineno">  458</span>&#160;                      <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yFilter= 0; yFilter &lt; filterHeight_roundup; yFilter += decomposedHeight) {</div><div class="line"><a name="l00459"></a><span class="lineno">  459</span>&#160;                          <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xFilter= 0; xFilter &lt; filterWidth_roundup; xFilter += decomposedWidth) {</div><div class="line"><a name="l00460"></a><span class="lineno">  460</span>&#160;                              <span class="comment">// Create transformFilter</span></div><div class="line"><a name="l00461"></a><span class="lineno">  461</span>&#160;                              std::vector&lt;int32_t&gt; filterPreData(decomposedHeight*decomposedWidth, 0);</div><div class="line"><a name="l00462"></a><span class="lineno">  462</span>&#160;</div><div class="line"><a name="l00463"></a><span class="lineno">  463</span>&#160;                              <span class="comment">// Get Filter indexes</span></div><div class="line"><a name="l00464"></a><span class="lineno">  464</span>&#160;                              <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xIncrFilter= 0; xIncrFilter &lt; decomposedWidth; xIncrFilter++) {</div><div class="line"><a name="l00465"></a><span class="lineno">  465</span>&#160;                                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yIncrFilter= 0; yIncrFilter &lt; decomposedHeight; yIncrFilter++) {</div><div class="line"><a name="l00466"></a><span class="lineno">  466</span>&#160;</div><div class="line"><a name="l00467"></a><span class="lineno">  467</span>&#160;                                  <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> filterIndex;</div><div class="line"><a name="l00468"></a><span class="lineno">  468</span>&#160;                                  <span class="keywordtype">bool</span> validFilterIndex = ((yFilter + yIncrFilter) &lt; filterHeight) &amp;&amp;  ((xFilter + xIncrFilter) &lt; filterWidth);</div><div class="line"><a name="l00469"></a><span class="lineno">  469</span>&#160;</div><div class="line"><a name="l00470"></a><span class="lineno">  470</span>&#160;                                  <span class="keywordflow">if</span> (data.m_Parameters.m_DataLayout == <a class="code" href="namespacearmnn.xhtml#ad1d5cce2d9e9a5d61c243e5c989112e0ad066db54b89b0912e7e7c6da51e2da51">DataLayout::NHWC</a>) {</div><div class="line"><a name="l00471"></a><span class="lineno">  471</span>&#160;                                    filterIndex = cOutput * filterHeight * filterWidth * inputChannels +</div><div class="line"><a name="l00472"></a><span class="lineno">  472</span>&#160;                                                  (yFilter + yIncrFilter)* filterWidth * inputChannels +</div><div class="line"><a name="l00473"></a><span class="lineno">  473</span>&#160;                                                  (xFilter + xIncrFilter)* inputChannels +</div><div class="line"><a name="l00474"></a><span class="lineno">  474</span>&#160;                                                  cInput;</div><div class="line"><a name="l00475"></a><span class="lineno">  475</span>&#160;                                  } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00476"></a><span class="lineno">  476</span>&#160;                                    filterIndex = cOutput * filterWidth * filterHeight * inputChannels +</div><div class="line"><a name="l00477"></a><span class="lineno">  477</span>&#160;                                                  cInput  * filterWidth * filterHeight +</div><div class="line"><a name="l00478"></a><span class="lineno">  478</span>&#160;                                                  (yFilter + yIncrFilter) * filterWidth +</div><div class="line"><a name="l00479"></a><span class="lineno">  479</span>&#160;                                                  xFilter + xIncrFilter;</div><div class="line"><a name="l00480"></a><span class="lineno">  480</span>&#160;                                  }</div><div class="line"><a name="l00481"></a><span class="lineno">  481</span>&#160;</div><div class="line"><a name="l00482"></a><span class="lineno">  482</span>&#160;                                  filterPreData[xIncrFilter + decomposedWidth*yIncrFilter] = validFilterIndex ? filterData[filterIndex] - <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;int32_t&gt;(filterOffset) : 0;</div><div class="line"><a name="l00483"></a><span class="lineno">  483</span>&#160;                                }</div><div class="line"><a name="l00484"></a><span class="lineno">  484</span>&#160;                              }</div><div class="line"><a name="l00485"></a><span class="lineno">  485</span>&#160;</div><div class="line"><a name="l00486"></a><span class="lineno">  486</span>&#160;                              <span class="comment">// transform Filters</span></div><div class="line"><a name="l00487"></a><span class="lineno">  487</span>&#160;                              std::vector&lt;int32_t&gt; transformedFilter(4*4, 0);</div><div class="line"><a name="l00488"></a><span class="lineno">  488</span>&#160;</div><div class="line"><a name="l00489"></a><span class="lineno">  489</span>&#160;                              <span class="keywordflow">if</span> (h_dim == 1) { <span class="comment">// Winograd 1x3</span></div><div class="line"><a name="l00490"></a><span class="lineno">  490</span>&#160;                                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> h = 0; h &lt; 4; h++) {</div><div class="line"><a name="l00491"></a><span class="lineno">  491</span>&#160;                                  transformedFilter[h + 4*0] = 2*filterPreData[0];</div><div class="line"><a name="l00492"></a><span class="lineno">  492</span>&#160;                                  transformedFilter[h + 4*1] = filterPreData[0] + filterPreData[1] + filterPreData[2];</div><div class="line"><a name="l00493"></a><span class="lineno">  493</span>&#160;                                  transformedFilter[h + 4*2] = filterPreData[0] - filterPreData[1] + filterPreData[2];</div><div class="line"><a name="l00494"></a><span class="lineno">  494</span>&#160;                                  transformedFilter[h + 4*3] = 2*filterPreData[2];</div><div class="line"><a name="l00495"></a><span class="lineno">  495</span>&#160;                                }</div><div class="line"><a name="l00496"></a><span class="lineno">  496</span>&#160;                              } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (w_dim == 1) { <span class="comment">// Winograd 3x1</span></div><div class="line"><a name="l00497"></a><span class="lineno">  497</span>&#160;                                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> w = 0; w &lt; 4; w++) {</div><div class="line"><a name="l00498"></a><span class="lineno">  498</span>&#160;                                  transformedFilter[w*4  + 0] = 2*filterPreData[0];</div><div class="line"><a name="l00499"></a><span class="lineno">  499</span>&#160;                                  transformedFilter[w*4  + 1] = filterPreData[0] + filterPreData[1] + filterPreData[2];</div><div class="line"><a name="l00500"></a><span class="lineno">  500</span>&#160;                                  transformedFilter[w*4  + 2] = filterPreData[0] - filterPreData[1] + filterPreData[2];</div><div class="line"><a name="l00501"></a><span class="lineno">  501</span>&#160;                                  transformedFilter[w*4  + 3] = 2*filterPreData[2];</div><div class="line"><a name="l00502"></a><span class="lineno">  502</span>&#160;                                }</div><div class="line"><a name="l00503"></a><span class="lineno">  503</span>&#160;                              } <span class="keywordflow">else</span> { <span class="comment">// Winograd 3x3</span></div><div class="line"><a name="l00504"></a><span class="lineno">  504</span>&#160;                                std::vector&lt;int32_t&gt; tmpFilter(h_dim*decomposedWidth, 0);</div><div class="line"><a name="l00505"></a><span class="lineno">  505</span>&#160;</div><div class="line"><a name="l00506"></a><span class="lineno">  506</span>&#160;                                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> w_filter=0; w_filter &lt; decomposedWidth; w_filter++) {</div><div class="line"><a name="l00507"></a><span class="lineno">  507</span>&#160;                                  tmpFilter[w_filter + 0*decomposedWidth] = 2*filterPreData[w_filter];</div><div class="line"><a name="l00508"></a><span class="lineno">  508</span>&#160;                                  tmpFilter[w_filter + 1*decomposedWidth] = filterPreData[w_filter] + filterPreData[1*decomposedWidth+w_filter] + filterPreData[2*decomposedWidth+w_filter];</div><div class="line"><a name="l00509"></a><span class="lineno">  509</span>&#160;                                  tmpFilter[w_filter + 2*decomposedWidth] = filterPreData[w_filter] - filterPreData[1*decomposedWidth+w_filter] + filterPreData[2*decomposedWidth+w_filter];</div><div class="line"><a name="l00510"></a><span class="lineno">  510</span>&#160;                                  tmpFilter[w_filter + 3*decomposedWidth] = 2*filterPreData[2*decomposedWidth+w_filter];</div><div class="line"><a name="l00511"></a><span class="lineno">  511</span>&#160;                                }</div><div class="line"><a name="l00512"></a><span class="lineno">  512</span>&#160;</div><div class="line"><a name="l00513"></a><span class="lineno">  513</span>&#160;                                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> h_filter=0; h_filter &lt; h_dim; h_filter++) {</div><div class="line"><a name="l00514"></a><span class="lineno">  514</span>&#160;                                  transformedFilter[h_filter*w_dim + 0] = 2*tmpFilter[h_filter*decomposedWidth];</div><div class="line"><a name="l00515"></a><span class="lineno">  515</span>&#160;                                  transformedFilter[h_filter*w_dim + 1] = tmpFilter[h_filter*decomposedWidth] + tmpFilter[1+h_filter*decomposedWidth] + tmpFilter[2+h_filter*decomposedWidth];</div><div class="line"><a name="l00516"></a><span class="lineno">  516</span>&#160;                                  transformedFilter[h_filter*w_dim + 2] = tmpFilter[h_filter*decomposedWidth] - tmpFilter[1+h_filter*decomposedWidth] + tmpFilter[2+h_filter*decomposedWidth];</div><div class="line"><a name="l00517"></a><span class="lineno">  517</span>&#160;                                  transformedFilter[h_filter*w_dim + 3] = 2*tmpFilter[2+h_filter*decomposedWidth];</div><div class="line"><a name="l00518"></a><span class="lineno">  518</span>&#160;                                }</div><div class="line"><a name="l00519"></a><span class="lineno">  519</span>&#160;                              }</div><div class="line"><a name="l00520"></a><span class="lineno">  520</span>&#160;</div><div class="line"><a name="l00521"></a><span class="lineno">  521</span>&#160;                              <span class="comment">// Create transformInput</span></div><div class="line"><a name="l00522"></a><span class="lineno">  522</span>&#160;                              std::vector&lt;int32_t&gt; Inputs(4*4, 0);</div><div class="line"><a name="l00523"></a><span class="lineno">  523</span>&#160;</div><div class="line"><a name="l00524"></a><span class="lineno">  524</span>&#160;                              <span class="comment">// Gathering 4x4 input patch</span></div><div class="line"><a name="l00525"></a><span class="lineno">  525</span>&#160;                              <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yIncr= 0; yIncr &lt; 4; yIncr++)</div><div class="line"><a name="l00526"></a><span class="lineno">  526</span>&#160;                              {</div><div class="line"><a name="l00527"></a><span class="lineno">  527</span>&#160;                                <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xIncr = 0; xIncr &lt; 4; xIncr++)</div><div class="line"><a name="l00528"></a><span class="lineno">  528</span>&#160;                                {</div><div class="line"><a name="l00529"></a><span class="lineno">  529</span>&#160;                                  <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yInput = yOutput * yStride + (yFilter + yIncr)*yDilation;</div><div class="line"><a name="l00530"></a><span class="lineno">  530</span>&#160;                                  <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xInput = xOutput * xStride + (xFilter + xIncr)*xDilation;</div><div class="line"><a name="l00531"></a><span class="lineno">  531</span>&#160;</div><div class="line"><a name="l00532"></a><span class="lineno">  532</span>&#160;                                  <span class="keywordtype">bool</span> validIndex = (xInput  &gt;= paddingLeft) &amp;&amp; (xInput &lt; inputWidth + paddingLeft) &amp;&amp;</div><div class="line"><a name="l00533"></a><span class="lineno">  533</span>&#160;                                                    (yInput &gt;= paddingTop) &amp;&amp; (yInput &lt; inputHeight + paddingTop);</div><div class="line"><a name="l00534"></a><span class="lineno">  534</span>&#160;</div><div class="line"><a name="l00535"></a><span class="lineno">  535</span>&#160;                                  <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> inputIndex;</div><div class="line"><a name="l00536"></a><span class="lineno">  536</span>&#160;                                  <span class="keywordflow">if</span> (data.m_Parameters.m_DataLayout == <a class="code" href="namespacearmnn.xhtml#ad1d5cce2d9e9a5d61c243e5c989112e0ad066db54b89b0912e7e7c6da51e2da51">DataLayout::NHWC</a>) {</div><div class="line"><a name="l00537"></a><span class="lineno">  537</span>&#160;                                    inputIndex =  batchIdx * inputHeight * inputWidth  * inputChannels +</div><div class="line"><a name="l00538"></a><span class="lineno">  538</span>&#160;                                                  (yInput - paddingTop) * inputWidth * inputChannels +</div><div class="line"><a name="l00539"></a><span class="lineno">  539</span>&#160;                                                  (xInput - paddingLeft) * inputChannels +</div><div class="line"><a name="l00540"></a><span class="lineno">  540</span>&#160;                                                  cInput;</div><div class="line"><a name="l00541"></a><span class="lineno">  541</span>&#160;</div><div class="line"><a name="l00542"></a><span class="lineno">  542</span>&#160;                                  } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00543"></a><span class="lineno">  543</span>&#160;                                      inputIndex =  batchIdx * inputWidth * inputHeight * inputChannels +</div><div class="line"><a name="l00544"></a><span class="lineno">  544</span>&#160;                                                    inputWidth * inputHeight * cInput +</div><div class="line"><a name="l00545"></a><span class="lineno">  545</span>&#160;                                                    inputWidth * (yInput - paddingTop) +</div><div class="line"><a name="l00546"></a><span class="lineno">  546</span>&#160;                                                    xInput - paddingLeft;</div><div class="line"><a name="l00547"></a><span class="lineno">  547</span>&#160;                                  }</div><div class="line"><a name="l00548"></a><span class="lineno">  548</span>&#160;                                  <span class="keywordflow">if</span> (h_dim ==1) Inputs[yIncr + 4*xIncr] = validIndex? inputData[inputIndex] - <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;int32_t&gt;(inputOffset) : 0;</div><div class="line"><a name="l00549"></a><span class="lineno">  549</span>&#160;                                  <span class="keywordflow">else</span> Inputs[xIncr + 4*yIncr] = validIndex? inputData[inputIndex] - <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;int32_t&gt;(inputOffset) : 0;</div><div class="line"><a name="l00550"></a><span class="lineno">  550</span>&#160;                                }</div><div class="line"><a name="l00551"></a><span class="lineno">  551</span>&#160;                              }</div><div class="line"><a name="l00552"></a><span class="lineno">  552</span>&#160;</div><div class="line"><a name="l00553"></a><span class="lineno">  553</span>&#160;                              std::vector&lt;int32_t&gt; transformedInput(4*4, 0);</div><div class="line"><a name="l00554"></a><span class="lineno">  554</span>&#160;                              std::vector&lt;int32_t&gt; tmpInput(4*4, 0);</div><div class="line"><a name="l00555"></a><span class="lineno">  555</span>&#160;</div><div class="line"><a name="l00556"></a><span class="lineno">  556</span>&#160;                              <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> in_w=0; in_w &lt; 4; in_w++) {</div><div class="line"><a name="l00557"></a><span class="lineno">  557</span>&#160;                                  tmpInput[in_w + 0*4] = (w_dim == 4)?  Inputs[in_w + 4*0] - Inputs[in_w + 4*2] : Inputs[in_w*4 + 0];</div><div class="line"><a name="l00558"></a><span class="lineno">  558</span>&#160;                                  tmpInput[in_w + 1*4] = (w_dim == 4)?  Inputs[in_w + 4*1] + Inputs[in_w + 4*2] : Inputs[in_w*4 + 1];</div><div class="line"><a name="l00559"></a><span class="lineno">  559</span>&#160;                                  tmpInput[in_w + 2*4] = (w_dim == 4)? -Inputs[in_w + 4*1] + Inputs[in_w + 4*2] : Inputs[in_w*4 + 2];</div><div class="line"><a name="l00560"></a><span class="lineno">  560</span>&#160;                                  tmpInput[in_w + 3*4] = (w_dim == 4)?  Inputs[in_w + 4*1] - Inputs[in_w + 4*3] : Inputs[in_w*4 + 3];</div><div class="line"><a name="l00561"></a><span class="lineno">  561</span>&#160;                              }</div><div class="line"><a name="l00562"></a><span class="lineno">  562</span>&#160;</div><div class="line"><a name="l00563"></a><span class="lineno">  563</span>&#160;                              <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> in_h=0; in_h &lt; 4; in_h++) {</div><div class="line"><a name="l00564"></a><span class="lineno">  564</span>&#160;                                  transformedInput[in_h*4 + 0] = (h_dim == 4)?  tmpInput[0 + 4*in_h] - tmpInput[2 + 4*in_h] : tmpInput[in_h*4 + 0];</div><div class="line"><a name="l00565"></a><span class="lineno">  565</span>&#160;                                  transformedInput[in_h*4 + 1] = (h_dim == 4)?  tmpInput[1 + 4*in_h] + tmpInput[2 + 4*in_h] : tmpInput[in_h*4 + 1];</div><div class="line"><a name="l00566"></a><span class="lineno">  566</span>&#160;                                  transformedInput[in_h*4 + 2] = (h_dim == 4)? -tmpInput[1 + 4*in_h] + tmpInput[2 + 4*in_h] : tmpInput[in_h*4 + 2];</div><div class="line"><a name="l00567"></a><span class="lineno">  567</span>&#160;                                  transformedInput[in_h*4 + 3] = (h_dim == 4)?  tmpInput[1 + 4*in_h] - tmpInput[3 + 4*in_h] : tmpInput[in_h*4 + 3];</div><div class="line"><a name="l00568"></a><span class="lineno">  568</span>&#160;                              }</div><div class="line"><a name="l00569"></a><span class="lineno">  569</span>&#160;</div><div class="line"><a name="l00570"></a><span class="lineno">  570</span>&#160;                              <span class="comment">// Convert in WTFP format</span></div><div class="line"><a name="l00571"></a><span class="lineno">  571</span>&#160;                              <span class="comment">// and MAC units WTFP domain</span></div><div class="line"><a name="l00572"></a><span class="lineno">  572</span>&#160;</div><div class="line"><a name="l00573"></a><span class="lineno">  573</span>&#160;                              <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> y= 0; y &lt; 4 ; y++) {</div><div class="line"><a name="l00574"></a><span class="lineno">  574</span>&#160;                                  <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> x = 0; x &lt; 4; x++) {</div><div class="line"><a name="l00575"></a><span class="lineno">  575</span>&#160;                                      <span class="keyword">const</span> <span class="keyword">struct </span><a class="code" href="structarmnn_1_1hw__float.xhtml">hw_float</a> weight_fp = <a class="code" href="namespacearmnn.xhtml#a172004b54c3faafcdba1e63fa2f2eb0c">convert_to_S13E8M</a>(numeric_cast&lt;int32_t&gt;(transformedFilter[x+4*y]));</div><div class="line"><a name="l00576"></a><span class="lineno">  576</span>&#160;                                      <span class="keyword">const</span> <span class="keyword">struct </span><a class="code" href="structarmnn_1_1hw__float.xhtml">hw_float</a> ifm_fp = <a class="code" href="namespacearmnn.xhtml#a8b11b1deeaebaeeb3fa73c25099a226f">convert_to_S12E8M</a>(numeric_cast&lt;int32_t&gt;(transformedInput[x+4*y]));</div><div class="line"><a name="l00577"></a><span class="lineno">  577</span>&#160;                                      <span class="keyword">const</span> <span class="keywordtype">bool</span> sign = (weight_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">sign</a> != ifm_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">sign</a>);</div><div class="line"><a name="l00578"></a><span class="lineno">  578</span>&#160;                                      uint64_t unsigned_ofm_value = <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;uint64_t&gt;(ifm_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>) * numeric_cast&lt;uint64_t&gt;(weight_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">man</a>);</div><div class="line"><a name="l00579"></a><span class="lineno">  579</span>&#160;                                      uint64_t total_shift = <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;uint64_t&gt;(weight_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a>) + numeric_cast&lt;uint64_t&gt;(ifm_fp.<a class="code" href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">exp</a>);</div><div class="line"><a name="l00580"></a><span class="lineno">  580</span>&#160;                                      unsigned_ofm_value &lt;&lt;= total_shift;</div><div class="line"><a name="l00581"></a><span class="lineno">  581</span>&#160;                                      <span class="keywordflow">if</span> (sign)</div><div class="line"><a name="l00582"></a><span class="lineno">  582</span>&#160;                                      {</div><div class="line"><a name="l00583"></a><span class="lineno">  583</span>&#160;                                        unsigned_ofm_value = ~unsigned_ofm_value;</div><div class="line"><a name="l00584"></a><span class="lineno">  584</span>&#160;                                        unsigned_ofm_value += 1;</div><div class="line"><a name="l00585"></a><span class="lineno">  585</span>&#160;                                      }</div><div class="line"><a name="l00586"></a><span class="lineno">  586</span>&#160;                                      int64_t ofm_value = 0;</div><div class="line"><a name="l00587"></a><span class="lineno">  587</span>&#160;                                      memcpy(&amp;ofm_value, &amp;unsigned_ofm_value, <span class="keyword">sizeof</span>(int64_t));</div><div class="line"><a name="l00588"></a><span class="lineno">  588</span>&#160;                                      MAC[x + 4*y] += ofm_value;</div><div class="line"><a name="l00589"></a><span class="lineno">  589</span>&#160;                                }</div><div class="line"><a name="l00590"></a><span class="lineno">  590</span>&#160;                              }</div><div class="line"><a name="l00591"></a><span class="lineno">  591</span>&#160;                           }</div><div class="line"><a name="l00592"></a><span class="lineno">  592</span>&#160;                        }</div><div class="line"><a name="l00593"></a><span class="lineno">  593</span>&#160;                     }</div><div class="line"><a name="l00594"></a><span class="lineno">  594</span>&#160;</div><div class="line"><a name="l00595"></a><span class="lineno">  595</span>&#160;                     <span class="comment">// Create transformedFinal</span></div><div class="line"><a name="l00596"></a><span class="lineno">  596</span>&#160;                     std::vector&lt;int64_t&gt; transformedFinal(h_stride*w_stride, 0);</div><div class="line"><a name="l00597"></a><span class="lineno">  597</span>&#160;</div><div class="line"><a name="l00598"></a><span class="lineno">  598</span>&#160;                     <span class="keywordflow">if</span> (h_dim == 1) { <span class="comment">// Winograd 1x3</span></div><div class="line"><a name="l00599"></a><span class="lineno">  599</span>&#160;                         transformedFinal[0] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[0] + MAC[4] + MAC[8]);</div><div class="line"><a name="l00600"></a><span class="lineno">  600</span>&#160;                         transformedFinal[1] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[4] - MAC[8] - MAC[12]);</div><div class="line"><a name="l00601"></a><span class="lineno">  601</span>&#160;                         transformedFinal[2] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[1] + MAC[5] + MAC[9]);</div><div class="line"><a name="l00602"></a><span class="lineno">  602</span>&#160;                         transformedFinal[3] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[5] - MAC[9] - MAC[13]);</div><div class="line"><a name="l00603"></a><span class="lineno">  603</span>&#160;                         transformedFinal[4] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[2] + MAC[6] + MAC[10]);</div><div class="line"><a name="l00604"></a><span class="lineno">  604</span>&#160;                         transformedFinal[5] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[6] - MAC[10] - MAC[14]);</div><div class="line"><a name="l00605"></a><span class="lineno">  605</span>&#160;                         transformedFinal[6] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[3] + MAC[7] + MAC[11]);</div><div class="line"><a name="l00606"></a><span class="lineno">  606</span>&#160;                         transformedFinal[7] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[7] - MAC[11] - MAC[15]);</div><div class="line"><a name="l00607"></a><span class="lineno">  607</span>&#160;                     } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (w_dim == 1) { <span class="comment">// Winograd 3x1</span></div><div class="line"><a name="l00608"></a><span class="lineno">  608</span>&#160;                         transformedFinal[0] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[0] + MAC[1] + MAC[2]);</div><div class="line"><a name="l00609"></a><span class="lineno">  609</span>&#160;                         transformedFinal[1] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[4] + MAC[5] + MAC[6]);</div><div class="line"><a name="l00610"></a><span class="lineno">  610</span>&#160;                         transformedFinal[2] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[8] + MAC[9] + MAC[10]);</div><div class="line"><a name="l00611"></a><span class="lineno">  611</span>&#160;                         transformedFinal[3] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[12] + MAC[13] + MAC[14]);</div><div class="line"><a name="l00612"></a><span class="lineno">  612</span>&#160;                         transformedFinal[4] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[1] - MAC[2] - MAC[3]);</div><div class="line"><a name="l00613"></a><span class="lineno">  613</span>&#160;                         transformedFinal[5] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[5] - MAC[6] - MAC[7]);</div><div class="line"><a name="l00614"></a><span class="lineno">  614</span>&#160;                         transformedFinal[6] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[9] - MAC[10] - MAC[11]);</div><div class="line"><a name="l00615"></a><span class="lineno">  615</span>&#160;                         transformedFinal[7] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[13] - MAC[14] - MAC[15]);</div><div class="line"><a name="l00616"></a><span class="lineno">  616</span>&#160;                     } <span class="keywordflow">else</span> { <span class="comment">// Winograd 3x3</span></div><div class="line"><a name="l00617"></a><span class="lineno">  617</span>&#160;                         transformedFinal[0] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(   <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[0] + MAC[4]  + MAC[8])</div><div class="line"><a name="l00618"></a><span class="lineno">  618</span>&#160;                                                            + <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[1] + MAC[5]  + MAC[9])</div><div class="line"><a name="l00619"></a><span class="lineno">  619</span>&#160;                                                            + <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[2] + MAC[6]  + MAC[10])  );</div><div class="line"><a name="l00620"></a><span class="lineno">  620</span>&#160;                         transformedFinal[1] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(   <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[1] + MAC[5]  + MAC[9])</div><div class="line"><a name="l00621"></a><span class="lineno">  621</span>&#160;                                                            - <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[2] + MAC[6]  + MAC[10])</div><div class="line"><a name="l00622"></a><span class="lineno">  622</span>&#160;                                                            - <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[3] + MAC[7]  + MAC[11])  );</div><div class="line"><a name="l00623"></a><span class="lineno">  623</span>&#160;                         transformedFinal[2] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(   <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[4] - MAC[8]  - MAC[12])</div><div class="line"><a name="l00624"></a><span class="lineno">  624</span>&#160;                                                            + <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[5] - MAC[9]  - MAC[13])</div><div class="line"><a name="l00625"></a><span class="lineno">  625</span>&#160;                                                            + <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[6] - MAC[10] - MAC[14])  );</div><div class="line"><a name="l00626"></a><span class="lineno">  626</span>&#160;                         transformedFinal[3] += <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(   <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[5] - MAC[9]  - MAC[13])</div><div class="line"><a name="l00627"></a><span class="lineno">  627</span>&#160;                                                            - <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[6] - MAC[10] - MAC[14])</div><div class="line"><a name="l00628"></a><span class="lineno">  628</span>&#160;                                                            - <a class="code" href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">ShiftRound</a>(MAC[7] - MAC[11] - MAC[15])  );</div><div class="line"><a name="l00629"></a><span class="lineno">  629</span>&#160;                     }</div><div class="line"><a name="l00630"></a><span class="lineno">  630</span>&#160;</div><div class="line"><a name="l00631"></a><span class="lineno">  631</span>&#160;                    <span class="comment">// Create output pacthes</span></div><div class="line"><a name="l00632"></a><span class="lineno">  632</span>&#160;                    <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yOutIncr= 0; yOutIncr &lt; h_stride; yOutIncr++)</div><div class="line"><a name="l00633"></a><span class="lineno">  633</span>&#160;                    {</div><div class="line"><a name="l00634"></a><span class="lineno">  634</span>&#160;                      <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xOutIncr = 0; xOutIncr &lt; w_stride; xOutIncr++)</div><div class="line"><a name="l00635"></a><span class="lineno">  635</span>&#160;                      {</div><div class="line"><a name="l00636"></a><span class="lineno">  636</span>&#160;                        <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> xOut = xOutput + xOutIncr;</div><div class="line"><a name="l00637"></a><span class="lineno">  637</span>&#160;                        <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> yOut = yOutput + yOutIncr;</div><div class="line"><a name="l00638"></a><span class="lineno">  638</span>&#160;</div><div class="line"><a name="l00639"></a><span class="lineno">  639</span>&#160;                        <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> sumIndex = xOutIncr + w_stride*yOutIncr;</div><div class="line"><a name="l00640"></a><span class="lineno">  640</span>&#160;                        int64_t value = std::min&lt;int64_t&gt;(std::max&lt;int64_t&gt;(transformedFinal[sumIndex], std::numeric_limits&lt;int32_t&gt;::min()),</div><div class="line"><a name="l00641"></a><span class="lineno">  641</span>&#160;                                          std::numeric_limits&lt;int32_t&gt;::max());</div><div class="line"><a name="l00642"></a><span class="lineno">  642</span>&#160;</div><div class="line"><a name="l00643"></a><span class="lineno">  643</span>&#160;                        <span class="keywordflow">if</span> (data.m_Parameters.m_BiasEnabled)</div><div class="line"><a name="l00644"></a><span class="lineno">  644</span>&#160;                        {</div><div class="line"><a name="l00645"></a><span class="lineno">  645</span>&#160;                          value += biasData[cOutput];</div><div class="line"><a name="l00646"></a><span class="lineno">  646</span>&#160;                        }</div><div class="line"><a name="l00647"></a><span class="lineno">  647</span>&#160;                        <span class="keywordflow">if</span> (outputScale != 0.0f)</div><div class="line"><a name="l00648"></a><span class="lineno">  648</span>&#160;                        {</div><div class="line"><a name="l00649"></a><span class="lineno">  649</span>&#160;                          <span class="keywordtype">float</span> multiplier = (inputScale*filterScale) / outputScale;</div><div class="line"><a name="l00650"></a><span class="lineno">  650</span>&#160;</div><div class="line"><a name="l00651"></a><span class="lineno">  651</span>&#160;                          value = <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;int64_t&gt;(</div><div class="line"><a name="l00652"></a><span class="lineno">  652</span>&#160;                                    <a class="code" href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml#abcc6c336d5ed521b1d7c0e7b86a28176">EthosnRefQuantizedMultiplierSmallerThanOne</a>(multiplier) * <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;int64_t&gt;(value))</div><div class="line"><a name="l00653"></a><span class="lineno">  653</span>&#160;                                  + <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;int64_t&gt;(outputOffset);</div><div class="line"><a name="l00654"></a><span class="lineno">  654</span>&#160;                          value = std::min&lt;int64_t&gt;(std::max&lt;int64_t&gt;(value, std::numeric_limits&lt;InputType&gt;::min()),</div><div class="line"><a name="l00655"></a><span class="lineno">  655</span>&#160;                                    std::numeric_limits&lt;InputType&gt;::max());</div><div class="line"><a name="l00656"></a><span class="lineno">  656</span>&#160;                        }</div><div class="line"><a name="l00657"></a><span class="lineno">  657</span>&#160;</div><div class="line"><a name="l00658"></a><span class="lineno">  658</span>&#160;                        <span class="keywordflow">if</span> (yOut &lt; outputHeight &amp;&amp; xOut &lt; outputWidth) output.<a class="code" href="classarmnn_1_1_tensor_buffer_array_view.xhtml#ab3a2cf851173535ea07b9d87eaf3ca01">Get</a>(batchIdx, cOutput, yOut, xOut) = <a class="code" href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">numeric_cast</a>&lt;InputType&gt;(value);</div><div class="line"><a name="l00659"></a><span class="lineno">  659</span>&#160;                     }</div><div class="line"><a name="l00660"></a><span class="lineno">  660</span>&#160;                  }</div><div class="line"><a name="l00661"></a><span class="lineno">  661</span>&#160;               }</div><div class="line"><a name="l00662"></a><span class="lineno">  662</span>&#160;            }</div><div class="line"><a name="l00663"></a><span class="lineno">  663</span>&#160;         }</div><div class="line"><a name="l00664"></a><span class="lineno">  664</span>&#160;      }</div><div class="line"><a name="l00665"></a><span class="lineno">  665</span>&#160;   }</div><div class="line"><a name="l00666"></a><span class="lineno">  666</span>&#160;} <span class="comment">//namespace armnn</span></div><div class="ttc" id="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one_xhtml_ac04834893e568dfea4642ecb575faed8"><div class="ttname"><a href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml#ac04834893e568dfea4642ecb575faed8">armnn::EthosnRefQuantizedMultiplierSmallerThanOne::operator*</a></div><div class="ttdeci">int32_t operator*(int32_t rhs) const</div><div class="ttdoc">The implementation of this function is adapted from Android NN&amp;#39;s MultiplyByEthosnRefQuantizedMultipli...</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8cpp_source.xhtml#l00048">EthosnRefConvImpl.cpp:48</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_a8b11b1deeaebaeeb3fa73c25099a226f"><div class="ttname"><a href="namespacearmnn.xhtml#a8b11b1deeaebaeeb3fa73c25099a226f">armnn::convert_to_S12E8M</a></div><div class="ttdeci">struct hw_float convert_to_S12E8M(const int32_t val)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00285">EthosnRefConvImpl.hpp:285</a></div></div>
<div class="ttc" id="_data_layout_indexed_8hpp_xhtml"><div class="ttname"><a href="_data_layout_indexed_8hpp.xhtml">DataLayoutIndexed.hpp</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_aed2e033162f0f4e4242942c4e2ab2ed8"><div class="ttname"><a href="namespacearmnn.xhtml#aed2e033162f0f4e4242942c4e2ab2ed8">armnn::bit_field</a></div><div class="ttdeci">uint32_t bit_field(const uint8_t high_bit, const uint8_t low_bit, const uint32_t val)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00246">EthosnRefConvImpl.hpp:246</a></div></div>
<div class="ttc" id="classarmnn_utils_1_1_data_layout_indexed_xhtml_a414e6f95548e6f7a01d5028b55ad3941"><div class="ttname"><a href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a414e6f95548e6f7a01d5028b55ad3941">armnnUtils::DataLayoutIndexed::GetWidthIndex</a></div><div class="ttdeci">unsigned int GetWidthIndex() const</div><div class="ttdef"><b>Definition:</b> <a href="_data_layout_indexed_8hpp_source.xhtml#l00025">DataLayoutIndexed.hpp:25</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_aa7dfb0fb6a458a92ec5bb985aea1daae"><div class="ttname"><a href="namespacearmnn.xhtml#aa7dfb0fb6a458a92ec5bb985aea1daae">armnn::one_mask</a></div><div class="ttdeci">uint32_t one_mask(const uint8_t size)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00225">EthosnRefConvImpl.hpp:225</a></div></div>
<div class="ttc" id="_tensor_8hpp_xhtml"><div class="ttname"><a href="_tensor_8hpp.xhtml">Tensor.hpp</a></div></div>
<div class="ttc" id="classarmnn_1_1_tensor_info_xhtml_a8b5d0f8a24e9d9238f412260a552acf8"><div class="ttname"><a href="classarmnn_1_1_tensor_info.xhtml#a8b5d0f8a24e9d9238f412260a552acf8">armnn::TensorInfo::GetShape</a></div><div class="ttdeci">const TensorShape &amp; GetShape() const</div><div class="ttdef"><b>Definition:</b> <a href="_tensor_8hpp_source.xhtml#l00191">Tensor.hpp:191</a></div></div>
<div class="ttc" id="classarmnn_1_1_tensor_info_xhtml"><div class="ttname"><a href="classarmnn_1_1_tensor_info.xhtml">armnn::TensorInfo</a></div><div class="ttdef"><b>Definition:</b> <a href="_tensor_8hpp_source.xhtml#l00152">Tensor.hpp:152</a></div></div>
<div class="ttc" id="_ethosn_ref_workload_utils_8hpp_xhtml"><div class="ttname"><a href="_ethosn_ref_workload_utils_8hpp.xhtml">EthosnRefWorkloadUtils.hpp</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_a1c8a12a9ede3260e20c65238f20a52a6"><div class="ttname"><a href="namespacearmnn.xhtml#a1c8a12a9ede3260e20c65238f20a52a6">armnn::wtfp_round_mantissa</a></div><div class="ttdeci">uint32_t wtfp_round_mantissa(const uint32_t unshifted_mantissa, const uint32_t exp)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00259">EthosnRefConvImpl.hpp:259</a></div></div>
<div class="ttc" id="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one_xhtml"><div class="ttname"><a href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml">armnn::EthosnRefQuantizedMultiplierSmallerThanOne</a></div><div class="ttdoc">Performs multiplication of an integer with a multiplier which is less than one, using quantized integ...</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00025">EthosnRefConvImpl.hpp:25</a></div></div>
<div class="ttc" id="structarmnn_1_1hw__float_xhtml_abd69797ed098e8001c460f9bd34ec7b6"><div class="ttname"><a href="structarmnn_1_1hw__float.xhtml#abd69797ed098e8001c460f9bd34ec7b6">armnn::hw_float::exp</a></div><div class="ttdeci">uint32_t exp</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00222">EthosnRefConvImpl.hpp:222</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml"><div class="ttname"><a href="namespacearmnn.xhtml">armnn</a></div><div class="ttdoc">Copyright (c) 2021 ARM Limited and Contributors. </div><div class="ttdef"><b>Definition:</b> <a href="01__00__quick__start_8dox_source.xhtml#l00006">01_00_quick_start.dox:6</a></div></div>
<div class="ttc" id="ethosnref_2workloads_2_tensor_buffer_array_view_8hpp_xhtml"><div class="ttname"><a href="ethosnref_2workloads_2_tensor_buffer_array_view_8hpp.xhtml">TensorBufferArrayView.hpp</a></div></div>
<div class="ttc" id="classarmnn_utils_1_1_data_layout_indexed_xhtml_a61c00316c443adc233c24e85c6c5b740"><div class="ttname"><a href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a61c00316c443adc233c24e85c6c5b740">armnnUtils::DataLayoutIndexed::GetHeightIndex</a></div><div class="ttdeci">unsigned int GetHeightIndex() const</div><div class="ttdef"><b>Definition:</b> <a href="_data_layout_indexed_8hpp_source.xhtml#l00024">DataLayoutIndexed.hpp:24</a></div></div>
<div class="ttc" id="_numeric_cast_8hpp_xhtml"><div class="ttname"><a href="_numeric_cast_8hpp.xhtml">NumericCast.hpp</a></div></div>
<div class="ttc" id="namespacearmnn_1_1ethosnref_xhtml_abeb64a06a58ebe59c4a832f29246b550"><div class="ttname"><a href="namespacearmnn_1_1ethosnref.xhtml#abeb64a06a58ebe59c4a832f29246b550">armnn::ethosnref::DivideRoundUp</a></div><div class="ttdeci">uint32_t DivideRoundUp(uint32_t numerator, uint32_t denominator)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_workload_utils_8hpp_source.xhtml#l00051">EthosnRefWorkloadUtils.hpp:51</a></div></div>
<div class="ttc" id="classarmnn_1_1_tensor_buffer_array_view_xhtml_ab3a2cf851173535ea07b9d87eaf3ca01"><div class="ttname"><a href="classarmnn_1_1_tensor_buffer_array_view.xhtml#ab3a2cf851173535ea07b9d87eaf3ca01">armnn::TensorBufferArrayView::Get</a></div><div class="ttdeci">DataType &amp; Get(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const</div><div class="ttdef"><b>Definition:</b> <a href="ethosnref_2workloads_2_tensor_buffer_array_view_8hpp_source.xhtml#l00031">TensorBufferArrayView.hpp:31</a></div></div>
<div class="ttc" id="namespacearmnn_1_1ethosnref_xhtml_a0781fc227cbcb3e82fd137bc0f6638cd"><div class="ttname"><a href="namespacearmnn_1_1ethosnref.xhtml#a0781fc227cbcb3e82fd137bc0f6638cd">armnn::ethosnref::GetTensorInfo</a></div><div class="ttdeci">const TensorInfo &amp; GetTensorInfo(const ITensorHandle *tensorHandle)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_workload_utils_8hpp_source.xhtml#l00029">EthosnRefWorkloadUtils.hpp:29</a></div></div>
<div class="ttc" id="classarmnn_1_1_tensor_buffer_array_view_xhtml"><div class="ttname"><a href="classarmnn_1_1_tensor_buffer_array_view.xhtml">armnn::TensorBufferArrayView</a></div><div class="ttdef"><b>Definition:</b> <a href="ethosnref_2workloads_2_tensor_buffer_array_view_8hpp_source.xhtml#l00019">TensorBufferArrayView.hpp:19</a></div></div>
<div class="ttc" id="classarmnn_utils_1_1_data_layout_indexed_xhtml"><div class="ttname"><a href="classarmnn_utils_1_1_data_layout_indexed.xhtml">armnnUtils::DataLayoutIndexed</a></div><div class="ttdoc">Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...</div><div class="ttdef"><b>Definition:</b> <a href="_data_layout_indexed_8hpp_source.xhtml#l00017">DataLayoutIndexed.hpp:17</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_a8775760e910e27c0fb0e1d7b1cf6d184"><div class="ttname"><a href="namespacearmnn.xhtml#a8775760e910e27c0fb0e1d7b1cf6d184">armnn::bit_extract</a></div><div class="ttdeci">bool bit_extract(const uint8_t pos, const uint32_t val)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00239">EthosnRefConvImpl.hpp:239</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_a172004b54c3faafcdba1e63fa2f2eb0c"><div class="ttname"><a href="namespacearmnn.xhtml#a172004b54c3faafcdba1e63fa2f2eb0c">armnn::convert_to_S13E8M</a></div><div class="ttdeci">struct hw_float convert_to_S13E8M(const int32_t val)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00320">EthosnRefConvImpl.hpp:320</a></div></div>
<div class="ttc" id="structarmnn_1_1exp_xhtml"><div class="ttname"><a href="structarmnn_1_1exp.xhtml">armnn::exp</a></div><div class="ttdef"><b>Definition:</b> <a href="_exp_8hpp_source.xhtml#l00013">Exp.hpp:13</a></div></div>
<div class="ttc" id="classarmnn_1_1_invalid_argument_exception_xhtml"><div class="ttname"><a href="classarmnn_1_1_invalid_argument_exception.xhtml">armnn::InvalidArgumentException</a></div><div class="ttdef"><b>Definition:</b> <a href="_exceptions_8hpp_source.xhtml#l00080">Exceptions.hpp:80</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_aef93a71fd801d784e24adeb1503ccaea"><div class="ttname"><a href="namespacearmnn.xhtml#aef93a71fd801d784e24adeb1503ccaea">armnn::ShiftRound</a></div><div class="ttdeci">int64_t ShiftRound(int64_t value, bool round=false)</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00363">EthosnRefConvImpl.hpp:363</a></div></div>
<div class="ttc" id="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one_xhtml_abcc6c336d5ed521b1d7c0e7b86a28176"><div class="ttname"><a href="structarmnn_1_1_ethosn_ref_quantized_multiplier_smaller_than_one.xhtml#abcc6c336d5ed521b1d7c0e7b86a28176">armnn::EthosnRefQuantizedMultiplierSmallerThanOne::EthosnRefQuantizedMultiplierSmallerThanOne</a></div><div class="ttdeci">EthosnRefQuantizedMultiplierSmallerThanOne(float multiplier)</div><div class="ttdoc">Constructs a EthosnRefQuantizedMultiplierSmallerThanOne which will multiply by the given multiplier...</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8cpp_source.xhtml#l00016">EthosnRefConvImpl.cpp:16</a></div></div>
<div class="ttc" id="structarmnn_1_1hw__float_xhtml_a81f962a76560c6bef5d04b53b071e6ef"><div class="ttname"><a href="structarmnn_1_1hw__float.xhtml#a81f962a76560c6bef5d04b53b071e6ef">armnn::hw_float::man</a></div><div class="ttdeci">uint32_t man</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00221">EthosnRefConvImpl.hpp:221</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_a375ca3cff9f1b005d1412dc5f3cf5b6e"><div class="ttname"><a href="namespacearmnn.xhtml#a375ca3cff9f1b005d1412dc5f3cf5b6e">armnn::numeric_cast</a></div><div class="ttdeci">std::enable_if_t&lt; std::is_unsigned&lt; Source &gt;::value &amp;&amp;std::is_unsigned&lt; Dest &gt;::value, Dest &gt; numeric_cast(Source source)</div><div class="ttdef"><b>Definition:</b> <a href="_numeric_cast_8hpp_source.xhtml#l00035">NumericCast.hpp:35</a></div></div>
<div class="ttc" id="structarmnn_1_1hw__float_xhtml_ac791945aa5d385872f11a441e29fe786"><div class="ttname"><a href="structarmnn_1_1hw__float.xhtml#ac791945aa5d385872f11a441e29fe786">armnn::hw_float::sign</a></div><div class="ttdeci">bool sign</div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00220">EthosnRefConvImpl.hpp:220</a></div></div>
<div class="ttc" id="structarmnn_1_1hw__float_xhtml"><div class="ttname"><a href="structarmnn_1_1hw__float.xhtml">armnn::hw_float</a></div><div class="ttdef"><b>Definition:</b> <a href="_ethosn_ref_conv_impl_8hpp_source.xhtml#l00218">EthosnRefConvImpl.hpp:218</a></div></div>
<div class="ttc" id="classarmnn_utils_1_1_data_layout_indexed_xhtml_a861b2621ee46e4b63379988b360b8cd9"><div class="ttname"><a href="classarmnn_utils_1_1_data_layout_indexed.xhtml#a861b2621ee46e4b63379988b360b8cd9">armnnUtils::DataLayoutIndexed::GetChannelsIndex</a></div><div class="ttdeci">unsigned int GetChannelsIndex() const</div><div class="ttdef"><b>Definition:</b> <a href="_data_layout_indexed_8hpp_source.xhtml#l00023">DataLayoutIndexed.hpp:23</a></div></div>
<div class="ttc" id="namespacearmnn_xhtml_ad1d5cce2d9e9a5d61c243e5c989112e0ad066db54b89b0912e7e7c6da51e2da51"><div class="ttname"><a href="namespacearmnn.xhtml#ad1d5cce2d9e9a5d61c243e5c989112e0ad066db54b89b0912e7e7c6da51e2da51">armnn::DataLayout::NHWC</a></div></div>
</div><!-- fragment --></div><!-- contents -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
  <ul>
    <li class="navelem"><a class="el" href="dir_68267d1309a1af8e8297ef4c3efbcdba.xhtml">src</a></li><li class="navelem"><a class="el" href="dir_0f3cdec46afbc61a1ded8e1687c9c9a0.xhtml">backends</a></li><li class="navelem"><a class="el" href="dir_79be26eedbf6c8f349c9502dc5f404e0.xhtml">ethosnref</a></li><li class="navelem"><a class="el" href="dir_d57d9adbd995db1fa77d5557ed93004c.xhtml">workloads</a></li><li class="navelem"><a class="el" href="_ethosn_ref_conv_impl_8hpp.xhtml">EthosnRefConvImpl.hpp</a></li>
    <li class="footer">Generated on Fri Aug 19 2022 14:38:30 for ArmNN by
    <a href="http://www.doxygen.org/index.html">
    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
  </ul>
</div>
</body>
</html>