aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference/workloads
diff options
context:
space:
mode:
authorDavid Beck <david.beck@arm.com>2018-09-24 13:18:27 +0100
committerMatthew Bentham <matthew.bentham@arm.com>2018-10-10 16:16:57 +0100
commitb4540bef0b0327683fe8e63f727c1212800dc2a9 (patch)
treee1ea8bb6ee981640a1c469ceb556ed648ffde411 /src/backends/reference/workloads
parent2d9dd36fb6bc20b370701ab15463359b9db35f18 (diff)
downloadarmnn-b4540bef0b0327683fe8e63f727c1212800dc2a9.tar.gz
IVGCVSW-1898 : Ref backend folder structure
* Reference backend is renamed to backends/reference as per https://confluence.arm.com/display/MLENG/Pluggable+backends Change-Id: I27a13c274eb60995dfb459e3c49c0e2f60bcd32c
Diffstat (limited to 'src/backends/reference/workloads')
-rw-r--r--src/backends/reference/workloads/Activation.cpp91
-rw-r--r--src/backends/reference/workloads/Activation.hpp20
-rw-r--r--src/backends/reference/workloads/ArithmeticFunction.cpp29
-rw-r--r--src/backends/reference/workloads/ArithmeticFunction.hpp24
-rw-r--r--src/backends/reference/workloads/BatchNormImpl.hpp56
-rw-r--r--src/backends/reference/workloads/Broadcast.cpp33
-rw-r--r--src/backends/reference/workloads/Broadcast.hpp58
-rw-r--r--src/backends/reference/workloads/CMakeLists.txt102
-rw-r--r--src/backends/reference/workloads/ConvImpl.cpp71
-rw-r--r--src/backends/reference/workloads/ConvImpl.hpp187
-rw-r--r--src/backends/reference/workloads/FullyConnected.cpp62
-rw-r--r--src/backends/reference/workloads/FullyConnected.hpp22
-rw-r--r--src/backends/reference/workloads/Merger.hpp82
-rw-r--r--src/backends/reference/workloads/Pooling2d.cpp241
-rw-r--r--src/backends/reference/workloads/Pooling2d.hpp21
-rw-r--r--src/backends/reference/workloads/RefActivationFloat32Workload.cpp28
-rw-r--r--src/backends/reference/workloads/RefActivationFloat32Workload.hpp20
-rw-r--r--src/backends/reference/workloads/RefActivationUint8Workload.cpp38
-rw-r--r--src/backends/reference/workloads/RefActivationUint8Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefArithmeticWorkload.cpp69
-rw-r--r--src/backends/reference/workloads/RefArithmeticWorkload.hpp122
-rw-r--r--src/backends/reference/workloads/RefBaseConstantWorkload.cpp49
-rw-r--r--src/backends/reference/workloads/RefBaseConstantWorkload.hpp33
-rw-r--r--src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp38
-rw-r--r--src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp28
-rw-r--r--src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp47
-rw-r--r--src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp28
-rw-r--r--src/backends/reference/workloads/RefConstantFloat32Workload.cpp19
-rw-r--r--src/backends/reference/workloads/RefConstantFloat32Workload.hpp20
-rw-r--r--src/backends/reference/workloads/RefConstantUint8Workload.cpp19
-rw-r--r--src/backends/reference/workloads/RefConstantUint8Workload.hpp20
-rw-r--r--src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp25
-rw-r--r--src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp29
-rw-r--r--src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp37
-rw-r--r--src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp27
-rw-r--r--src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp45
-rw-r--r--src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp28
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp37
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp27
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp46
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp26
-rw-r--r--src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp42
-rw-r--r--src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefFloorFloat32Workload.cpp29
-rw-r--r--src/backends/reference/workloads/RefFloorFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp43
-rw-r--r--src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp26
-rw-r--r--src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp66
-rw-r--r--src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp26
-rw-r--r--src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp61
-rw-r--r--src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefLstmFloat32Workload.cpp16
-rw-r--r--src/backends/reference/workloads/RefLstmFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefMergerFloat32Workload.cpp21
-rw-r--r--src/backends/reference/workloads/RefMergerFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefMergerUint8Workload.cpp21
-rw-r--r--src/backends/reference/workloads/RefMergerUint8Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp185
-rw-r--r--src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefPermuteWorkload.cpp32
-rw-r--r--src/backends/reference/workloads/RefPermuteWorkload.hpp33
-rw-r--r--src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp33
-rw-r--r--src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefPooling2dUint8Workload.cpp37
-rw-r--r--src/backends/reference/workloads/RefPooling2dUint8Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefReshapeFloat32Workload.cpp27
-rw-r--r--src/backends/reference/workloads/RefReshapeFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefReshapeUint8Workload.cpp27
-rw-r--r--src/backends/reference/workloads/RefReshapeUint8Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp29
-rw-r--r--src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp33
-rw-r--r--src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp26
-rw-r--r--src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp36
-rw-r--r--src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefSplitterFloat32Workload.cpp21
-rw-r--r--src/backends/reference/workloads/RefSplitterFloat32Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefSplitterUint8Workload.cpp21
-rw-r--r--src/backends/reference/workloads/RefSplitterUint8Workload.hpp21
-rw-r--r--src/backends/reference/workloads/RefWorkloadUtils.hpp138
-rw-r--r--src/backends/reference/workloads/RefWorkloads.hpp53
-rw-r--r--src/backends/reference/workloads/ResizeBilinear.cpp92
-rw-r--r--src/backends/reference/workloads/ResizeBilinear.hpp15
-rw-r--r--src/backends/reference/workloads/Softmax.cpp49
-rw-r--r--src/backends/reference/workloads/Softmax.hpp16
-rw-r--r--src/backends/reference/workloads/Splitter.hpp84
-rw-r--r--src/backends/reference/workloads/TensorBufferArrayView.hpp42
91 files changed, 3804 insertions, 0 deletions
diff --git a/src/backends/reference/workloads/Activation.cpp b/src/backends/reference/workloads/Activation.cpp
new file mode 100644
index 0000000000..ef4903074b
--- /dev/null
+++ b/src/backends/reference/workloads/Activation.cpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Activation.hpp"
+
+#include <boost/log/trivial.hpp>
+
+#include <cmath>
+
+namespace armnn
+{
+
+void Activation(const float* in,
+ float* out,
+ const TensorInfo& tensorInfo,
+ ActivationFunction function,
+ float a,
+ float b)
+{
+ for (size_t i = 0; i<tensorInfo.GetNumElements(); i++)
+ {
+ float input = in[i];
+ float output;
+
+ // Compute the result of the activation function.
+ switch (function)
+ {
+ case ActivationFunction::Linear:
+ {
+ output = a * input + b;
+ break;
+ }
+ case ActivationFunction::Sigmoid:
+ {
+ output = 1.f / (1.f + expf(-input));
+ break;
+ }
+ case ActivationFunction::ReLu:
+ {
+ output = std::max(0.f, input);
+ break;
+ }
+ case ActivationFunction::BoundedReLu:
+ {
+ output = std::min(a, std::max(b, input));
+ break;
+ }
+ case ActivationFunction::SoftReLu:
+ {
+ output = logf(1.0f + expf(input));
+ break;
+ }
+ case ActivationFunction::LeakyReLu:
+ {
+ output = input > 0.0f ? input : (input * a);
+ break;
+ }
+ case ActivationFunction::Abs:
+ {
+ output = input < 0 ? -input : input;
+ break;
+ }
+ case ActivationFunction::Sqrt:
+ {
+ output = sqrtf(input);
+ break;
+ }
+ case ActivationFunction::Square:
+ {
+ output = input * input;
+ break;
+ }
+ case ActivationFunction::TanH:
+ {
+ output = a * tanhf(b * input);
+ break;
+ }
+ default:
+ {
+ BOOST_LOG_TRIVIAL(error) << "Unsupported activation function";
+ return;
+ }
+ }
+
+ out[i] = output;
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Activation.hpp b/src/backends/reference/workloads/Activation.hpp
new file mode 100644
index 0000000000..c8a23114f0
--- /dev/null
+++ b/src/backends/reference/workloads/Activation.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+/// Performs the ActivationFunction elementwise on the inputs to give the outputs.
+void Activation(const float* in,
+ float* out,
+ const TensorInfo& tensorInfo,
+ ActivationFunction function,
+ float a,
+ float b);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/ArithmeticFunction.cpp b/src/backends/reference/workloads/ArithmeticFunction.cpp
new file mode 100644
index 0000000000..fede138253
--- /dev/null
+++ b/src/backends/reference/workloads/ArithmeticFunction.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ArithmeticFunction.hpp"
+#include "Broadcast.hpp"
+#include <functional>
+
+namespace armnn
+{
+
+template <typename Functor>
+ArithmeticFunction<Functor>::ArithmeticFunction(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData)
+{
+ BroadcastLoop(inShape0, inShape1, outShape).Unroll(Functor(), 0, inData0, inData1, outData);
+}
+
+} //namespace armnn
+
+template struct armnn::ArithmeticFunction<std::plus<float>>;
+template struct armnn::ArithmeticFunction<std::minus<float>>;
+template struct armnn::ArithmeticFunction<std::multiplies<float>>;
+template struct armnn::ArithmeticFunction<std::divides<float>>;
diff --git a/src/backends/reference/workloads/ArithmeticFunction.hpp b/src/backends/reference/workloads/ArithmeticFunction.hpp
new file mode 100644
index 0000000000..eafb6444f6
--- /dev/null
+++ b/src/backends/reference/workloads/ArithmeticFunction.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+template <typename Functor>
+struct ArithmeticFunction
+{
+ ArithmeticFunction(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData);
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/BatchNormImpl.hpp b/src/backends/reference/workloads/BatchNormImpl.hpp
new file mode 100644
index 0000000000..a7579c8373
--- /dev/null
+++ b/src/backends/reference/workloads/BatchNormImpl.hpp
@@ -0,0 +1,56 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <cmath>
+
+namespace armnn
+{
+
+template<typename NormData>
+static void BatchNormImpl(NormData data,
+ const float* varIn,
+ const float* meanIn,
+ const float* gammaIn,
+ const float* betaIn,
+ float * outputData,
+ const float * inputData)
+{
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+ for (unsigned int c = 0; c < inputInfo0.GetShape()[1]; c++)
+ {
+ float var = varIn[c];
+ float mean = meanIn[c];
+ float gamma = gammaIn[c];
+ float beta = betaIn[c];
+
+ float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
+ float add = beta - mult * mean;
+
+ for (unsigned int n = 0; n < inputInfo0.GetShape()[0]; n++)
+ {
+ for (unsigned int j = 0; j < inputInfo0.GetShape()[2]; j++)
+ {
+ for (unsigned int i = 0; i < inputInfo0.GetShape()[3]; i++)
+ {
+ unsigned int index = i +
+ j*inputInfo0.GetShape()[3] +
+ c*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] +
+ n*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2]
+ * inputInfo0.GetShape()[1];
+
+ outputData[index] = mult * inputData[index] + add;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Broadcast.cpp b/src/backends/reference/workloads/Broadcast.cpp
new file mode 100644
index 0000000000..8421a0a7ed
--- /dev/null
+++ b/src/backends/reference/workloads/Broadcast.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Broadcast.hpp"
+
+namespace armnn
+{
+
+BroadcastLoop::BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape)
+: m_DimData(outShape.GetNumDimensions())
+{
+ const unsigned int numDims = GetNumDimensions();
+
+ unsigned int sIn0 = 1;
+ unsigned int sIn1 = 1;
+ unsigned int sOut = 1;
+
+ for (unsigned int j = numDims - 1, k = 0; k < numDims ; k++, j--)
+ {
+ m_DimData[j].m_DimSize = outShape[j];
+ m_DimData[j].m_Stride1 = (inShape0[j] > 1) ? sIn0 : 0;
+ m_DimData[j].m_Stride2 = (inShape1[j] > 1) ? sIn1 : 0;
+ m_DimData[j].m_StrideOut = sOut;
+
+ sIn0 *= inShape0[j];
+ sIn1 *= inShape1[j];
+ sOut *= outShape[j];
+ }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/Broadcast.hpp b/src/backends/reference/workloads/Broadcast.hpp
new file mode 100644
index 0000000000..e92ed0598d
--- /dev/null
+++ b/src/backends/reference/workloads/Broadcast.hpp
@@ -0,0 +1,58 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/Tensor.hpp>
+
+#include <functional>
+
+namespace armnn
+{
+
+struct BroadcastLoop
+{
+ BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape);
+
+ unsigned int GetNumDimensions()
+ {
+ return static_cast<unsigned int>(m_DimData.size());
+ }
+
+ template <typename T0, typename T1, typename U, typename Func>
+ void Unroll(Func operationFunc,
+ unsigned int dimension,
+ const T0* inData0,
+ const T1* inData1,
+ U* outData)
+ {
+ if (dimension >= GetNumDimensions())
+ {
+ *outData = operationFunc(*inData0, *inData1);
+ return;
+ }
+
+ for (unsigned int i = 0; i < m_DimData[dimension].m_DimSize; i++)
+ {
+ Unroll(operationFunc, dimension + 1, inData0, inData1, outData);
+
+ inData0 += m_DimData[dimension].m_Stride1;
+ inData1 += m_DimData[dimension].m_Stride2;
+ outData += m_DimData[dimension].m_StrideOut;
+ }
+ }
+
+private:
+ // Struct to hold the dimension data.
+ struct BroadcastDimensionData
+ {
+ unsigned int m_DimSize;
+ unsigned int m_StrideOut;
+ unsigned int m_Stride1;
+ unsigned int m_Stride2;
+ };
+
+ std::vector<BroadcastDimensionData> m_DimData;
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
new file mode 100644
index 0000000000..7343b70daf
--- /dev/null
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -0,0 +1,102 @@
+#
+# Copyright © 2017 Arm Ltd. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+list(APPEND armnnRefBackendWorkloads_sources
+ Activation.cpp
+ Activation.hpp
+ ArithmeticFunction.cpp
+ ArithmeticFunction.hpp
+ BatchNormImpl.hpp
+ Broadcast.cpp
+ Broadcast.hpp
+ ConvImpl.cpp
+ ConvImpl.hpp
+ FullyConnected.cpp
+ FullyConnected.hpp
+ Merger.hpp
+ Pooling2d.cpp
+ Pooling2d.hpp
+ RefActivationFloat32Workload.cpp
+ RefActivationFloat32Workload.hpp
+ RefActivationUint8Workload.cpp
+ RefActivationUint8Workload.hpp
+ RefArithmeticWorkload.cpp
+ RefArithmeticWorkload.hpp
+ RefBaseConstantWorkload.cpp
+ RefBaseConstantWorkload.hpp
+ RefBatchNormalizationFloat32Workload.cpp
+ RefBatchNormalizationFloat32Workload.hpp
+ RefBatchNormalizationUint8Workload.cpp
+ RefBatchNormalizationUint8Workload.hpp
+ RefConstantFloat32Workload.cpp
+ RefConstantFloat32Workload.hpp
+ RefConstantUint8Workload.cpp
+ RefConstantUint8Workload.hpp
+ RefConvertFp16ToFp32Workload.cpp
+ RefConvertFp16ToFp32Workload.hpp
+ RefConvertFp32ToFp16Workload.cpp
+ RefConvertFp32ToFp16Workload.hpp
+ RefConvolution2dFloat32Workload.cpp
+ RefConvolution2dFloat32Workload.hpp
+ RefConvolution2dUint8Workload.cpp
+ RefConvolution2dUint8Workload.hpp
+ RefDepthwiseConvolution2dFloat32Workload.cpp
+ RefDepthwiseConvolution2dFloat32Workload.hpp
+ RefDepthwiseConvolution2dUint8Workload.cpp
+ RefDepthwiseConvolution2dUint8Workload.hpp
+ RefFakeQuantizationFloat32Workload.cpp
+ RefFakeQuantizationFloat32Workload.hpp
+ RefFloorFloat32Workload.cpp
+ RefFloorFloat32Workload.hpp
+ RefFullyConnectedFloat32Workload.cpp
+ RefFullyConnectedFloat32Workload.hpp
+ RefFullyConnectedUint8Workload.cpp
+ RefFullyConnectedUint8Workload.hpp
+ RefL2NormalizationFloat32Workload.cpp
+ RefL2NormalizationFloat32Workload.hpp
+ RefLstmFloat32Workload.cpp
+ RefLstmFloat32Workload.hpp
+ RefMergerFloat32Workload.cpp
+ RefMergerFloat32Workload.hpp
+ RefMergerUint8Workload.cpp
+ RefMergerUint8Workload.hpp
+ RefNormalizationFloat32Workload.cpp
+ RefNormalizationFloat32Workload.hpp
+ RefPermuteWorkload.cpp
+ RefPermuteWorkload.hpp
+ RefPooling2dFloat32Workload.cpp
+ RefPooling2dFloat32Workload.hpp
+ RefPooling2dUint8Workload.cpp
+ RefPooling2dUint8Workload.hpp
+ RefReshapeFloat32Workload.cpp
+ RefReshapeFloat32Workload.hpp
+ RefReshapeUint8Workload.cpp
+ RefReshapeUint8Workload.hpp
+ RefResizeBilinearFloat32Workload.cpp
+ RefResizeBilinearFloat32Workload.hpp
+ RefResizeBilinearUint8Workload.cpp
+ RefResizeBilinearUint8Workload.hpp
+ RefSoftmaxFloat32Workload.cpp
+ RefSoftmaxFloat32Workload.hpp
+ RefSoftmaxUint8Workload.cpp
+ RefSoftmaxUint8Workload.hpp
+ RefSplitterFloat32Workload.cpp
+ RefSplitterFloat32Workload.hpp
+ RefSplitterUint8Workload.cpp
+ RefSplitterUint8Workload.hpp
+ RefWorkloads.hpp
+ RefWorkloadUtils.hpp
+ ResizeBilinear.cpp
+ ResizeBilinear.hpp
+ Softmax.cpp
+ Softmax.hpp
+ Splitter.hpp
+ TensorBufferArrayView.hpp
+)
+
+add_library(armnnRefBackendWorkloads STATIC ${armnnRefBackendWorkloads_sources})
+target_include_directories(armnnRefBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src)
+target_include_directories(armnnRefBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn)
+target_include_directories(armnnRefBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils)
diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp
new file mode 100644
index 0000000000..8743a2bd0d
--- /dev/null
+++ b/src/backends/reference/workloads/ConvImpl.cpp
@@ -0,0 +1,71 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvImpl.hpp"
+
+#include <boost/assert.hpp>
+
+#include <cmath>
+#include <limits>
+
+namespace armnn
+{
+
+QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)
+{
+ BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);
+ if (multiplier == 0.0f)
+ {
+ m_Multiplier = 0;
+ m_RightShift = 0;
+ }
+ else
+ {
+ const double q = std::frexp(multiplier, &m_RightShift);
+ m_RightShift = -m_RightShift;
+ int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ BOOST_ASSERT(qFixed <= (1ll << 31));
+ if (qFixed == (1ll << 31))
+ {
+ qFixed /= 2;
+ --m_RightShift;
+ }
+ BOOST_ASSERT(m_RightShift >= 0);
+ BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
+ m_Multiplier = static_cast<int32_t>(qFixed);
+ }
+}
+
+int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const
+{
+ int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
+ return RoundingDivideByPOT(x, m_RightShift);
+}
+
+int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
+{
+ // Check for overflow.
+ if (a == b && a == std::numeric_limits<int32_t>::min())
+ {
+ return std::numeric_limits<int32_t>::max();
+ }
+ int64_t a_64(a);
+ int64_t b_64(b);
+ int64_t ab_64 = a_64 * b_64;
+ int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+ int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
+ return ab_x2_high32;
+}
+
+int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
+{
+ BOOST_ASSERT(exponent >= 0 && exponent <= 31);
+ int32_t mask = (1 << exponent) - 1;
+ int32_t remainder = x & mask;
+ int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
+ return (x >> exponent) + (remainder > threshold ? 1 : 0);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp
new file mode 100644
index 0000000000..4c9ab2a644
--- /dev/null
+++ b/src/backends/reference/workloads/ConvImpl.hpp
@@ -0,0 +1,187 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/assert.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <cmath>
+#include <limits>
+
+namespace armnn
+{
+
+/// Performs multiplication of an integer with a multiplier which is less than one,
+/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.
+struct QuantizedMultiplierSmallerThanOne
+{
+public:
+ /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
+ /// This stores the appropriate integer quantities (derived from the given multiplier) for later use.
+ /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().
+ QuantizedMultiplierSmallerThanOne(float multiplier);
+
+ /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().
+ int32_t operator*(int32_t rhs) const;
+
+private:
+ /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().
+ static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);
+
+ /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().
+ static int32_t RoundingDivideByPOT(int32_t x, int exponent);
+
+ int32_t m_Multiplier;
+ int32_t m_RightShift;
+};
+
+/// An implementation shared by normal and depthwise convolution.
+template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
+static void ConvImpl(ConvData data,
+ const InputType* inputData,
+ float inputScale,
+ int32_t inputOffset,
+ const InputType* filterData,
+ float filterScale,
+ int32_t filterOffset,
+ const BiasType* biasData,
+ InputType* outputData,
+ float outputScale,
+ int32_t outputOffset,
+ const TensorInfo& filterInfo,
+ bool depthwise = false)
+{
+ if (data.m_Parameters.m_BiasEnabled && !biasData)
+ {
+ throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
+ }
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+
+ unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;
+ unsigned int channelsInput = filterInfo.GetShape()[1];
+ unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];
+
+ unsigned int batchSize = outputInfo0.GetShape()[0];
+ unsigned int heightOutput = outputInfo0.GetShape()[2];
+ unsigned int widthOutput = outputInfo0.GetShape()[3];
+ unsigned int heightInput = inputInfo0.GetShape()[2];
+ unsigned int widthInput = inputInfo0.GetShape()[3];
+
+ unsigned int heightFilter = filterInfo.GetShape()[2];
+ unsigned int widthFilter = filterInfo.GetShape()[3];
+
+ unsigned int paddingTop = data.m_Parameters.m_PadTop;
+ unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
+ unsigned int hStride = data.m_Parameters.m_StrideY;
+ unsigned int xStride = data.m_Parameters.m_StrideX;
+
+ // The world's least efficient convolution.
+ for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
+ {
+ for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)
+ {
+ for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)
+ {
+ for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)
+ {
+ // This loop goes over each output element.
+ AccumulatorType sum = AccumulatorType();
+
+ // For depthwise, each output channel corresponds to exactly one input channel.
+ // For normal, must loop over each input channel.
+ for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)
+ {
+ unsigned int depthwiseMultiplierIdx = 0;
+ if (depthwise)
+ {
+ cInput = cOutput / depthMult;
+ depthwiseMultiplierIdx = cOutput % depthMult;
+ }
+
+ for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)
+ {
+ for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)
+ {
+ // This loop goes over each input element for each output element.
+
+ unsigned int filterIndex;
+
+ // Since dimensionality of kernel depends on depthwiseness, so does index.
+ if (depthwise)
+ {
+ filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput +
+ cInput * widthFilter * heightFilter +
+ yFilter * widthFilter +
+ xFilter;
+ }
+ else
+ {
+ filterIndex = cOutput * widthFilter * heightFilter * channelsInput +
+ cInput * widthFilter * heightFilter +
+ yFilter * widthFilter +
+ xFilter;
+ }
+ AccumulatorType filterValue = filterData[filterIndex] -
+ boost::numeric_cast<AccumulatorType>(filterOffset);
+
+ unsigned int yInput = yOutput * hStride + yFilter;
+ unsigned int xInput = xOutput * xStride + xFilter;
+
+ AccumulatorType inputValue;
+
+ // Check if we're in the padding.
+ if (yInput < paddingTop || yInput >= heightInput + paddingTop ||
+ xInput < paddingLeft || xInput >= widthInput + paddingLeft )
+ {
+ inputValue = AccumulatorType();
+ }
+ else
+ {
+ inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput +
+ widthInput * heightInput * cInput +
+ widthInput * (yInput - paddingTop) +
+ xInput - paddingLeft] -
+ boost::numeric_cast<AccumulatorType>(inputOffset);
+ }
+ sum += filterValue * inputValue;
+ }
+ }
+ }
+
+ if (data.m_Parameters.m_BiasEnabled)
+ {
+ sum += biasData[cOutput];
+ }
+
+ if (outputScale != 0.0f)
+ {
+ float multiplier = (inputScale * filterScale) / outputScale;
+ // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent
+ // with the AndroidNN CPU implementation. This should be (roughly) equivalent to:
+ // sum = std::round(multiplier * sum + outputOffset);
+ sum = boost::numeric_cast<AccumulatorType>(
+ QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))
+ + boost::numeric_cast<AccumulatorType>(outputOffset);
+ sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);
+ }
+
+ outputData[batchIdx * widthOutput * heightOutput * channelsOutput +
+ widthOutput * heightOutput * cOutput +
+ widthOutput * yOutput +
+ xOutput] = boost::numeric_cast<InputType>(sum);
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/FullyConnected.cpp b/src/backends/reference/workloads/FullyConnected.cpp
new file mode 100644
index 0000000000..bf5814d2ad
--- /dev/null
+++ b/src/backends/reference/workloads/FullyConnected.cpp
@@ -0,0 +1,62 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FullyConnected.hpp"
+
+#include <boost/assert.hpp>
+
+namespace armnn
+{
+
+void FullyConnected(const float* inputData,
+ float* outputData,
+ const TensorInfo& inputTensorInfo,
+ const TensorInfo& outputTensorInfo,
+ const float* weightData,
+ const float* biasData,
+ bool transposeWeights)
+{
+ unsigned int N = outputTensorInfo.GetShape()[1]; // Outputs Vector Size.
+
+ BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Needs some data.
+
+ unsigned int K = 1; // Total number of activations in the input.
+ for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++)
+ {
+ K *= inputTensorInfo.GetShape()[i];
+ }
+
+ for (unsigned int n = 0; n < inputTensorInfo.GetShape()[0]; n++)
+ {
+ for (unsigned int channelOutput = 0; channelOutput < N; channelOutput++)
+ {
+ float outval = 0.f;
+
+ for (unsigned int channelInput = 0; channelInput < K; channelInput++)
+ {
+ float weight;
+ if (transposeWeights)
+ {
+ weight = weightData[channelOutput * K + channelInput];
+ }
+ else
+ {
+ weight = weightData[channelInput * N + channelOutput];
+ }
+
+ outval += weight * inputData[n * K + channelInput];
+ }
+
+ if (biasData)
+ {
+ outval += biasData[channelOutput];
+ }
+
+ outputData[n * N + channelOutput] = outval;
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/FullyConnected.hpp b/src/backends/reference/workloads/FullyConnected.hpp
new file mode 100644
index 0000000000..623259f8f8
--- /dev/null
+++ b/src/backends/reference/workloads/FullyConnected.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+/// Performs a matrix multiplication and optionally adds a bias.
+void FullyConnected(const float* inputData,
+ float* outputData,
+ const TensorInfo& inputTensorInfo,
+ const TensorInfo& outputTensorInfo,
+ const float* weightData,
+ const float* biasData,
+ bool transposeWeights);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Merger.hpp b/src/backends/reference/workloads/Merger.hpp
new file mode 100644
index 0000000000..867925faa2
--- /dev/null
+++ b/src/backends/reference/workloads/Merger.hpp
@@ -0,0 +1,82 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+template <typename DataType>
+void Merger(const MergerQueueDescriptor& data)
+{
+ const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+
+ for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
+ {
+ unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
+
+ unsigned int indexRemainder = index;
+ unsigned int dimensionStride = outputInfo0.GetNumElements();
+
+ for (unsigned int i=0; i<outputInfo0.GetNumDimensions(); i++)
+ {
+ dimensionStride /= outputInfo0.GetShape()[i];
+ indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
+ indexRemainder -= indices[i] * dimensionStride;
+ }
+
+ for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
+ {
+ MergerQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
+
+ //Split view extents are defined by the size of (the corresponding) input tensor.
+ const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]);
+ BOOST_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
+
+ // Check all dimensions to see if this element is inside the given input view.
+ bool insideView = true;
+ for (unsigned int i=0; i<inputInfo.GetNumDimensions(); i++)
+ {
+ if (indices[i] < view.m_Origin[i])
+ {
+ insideView = false;
+ }
+ if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i])
+ {
+ insideView = false;
+ }
+ }
+
+ if (insideView)
+ {
+ unsigned int inIndex = 0;
+ unsigned int dimensionStride = 1;
+
+ for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;)
+ {
+ inIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
+ dimensionStride *= inputInfo.GetShape()[i];
+ }
+
+ //We are within the view, copy input data to the output corresponding to this view.
+ (GetOutputTensorData<DataType>(0, data))[index] =
+ (GetInputTensorData<DataType>(viewIdx, data))[inIndex];
+
+ //What should we do if input views overlap on the output tensor?
+ //We could error, take the average, or shm else...
+ //For now just stop after finding first view (input) that matches.
+ break;
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Pooling2d.cpp b/src/backends/reference/workloads/Pooling2d.cpp
new file mode 100644
index 0000000000..5812a290e7
--- /dev/null
+++ b/src/backends/reference/workloads/Pooling2d.cpp
@@ -0,0 +1,241 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Pooling2d.hpp"
+
+#include <armnn/Exceptions.hpp>
+#include <armnn/Types.hpp>
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <limits>
+#include <algorithm>
+#include <functional>
+
+namespace
+{
+ using PoolingAlgorithm = armnn::PoolingAlgorithm;
+
+ float DefaultInitializer(PoolingAlgorithm algorithm)
+ {
+ switch (algorithm)
+ {
+ case PoolingAlgorithm::Max:
+ {
+ return std::numeric_limits<float>::lowest();
+ }
+ case PoolingAlgorithm::Average:
+ case PoolingAlgorithm::L2:
+ {
+ return 0.0f;
+ }
+ default:
+ {
+ throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+ }
+ }
+ }
+
+ using Accumulator = std::function<void(float & accu, float value)>;
+
+ Accumulator GetAccumulator(PoolingAlgorithm algorithm)
+ {
+ switch (algorithm)
+ {
+ case PoolingAlgorithm::Max:
+ {
+ return [](float & accu, float value) {
+ if (value > accu) {
+ accu = value;
+ }
+ };
+ }
+
+ case PoolingAlgorithm::Average:
+ {
+ return [](float & accu, float value) {
+ accu += value;
+ };
+ }
+
+ case PoolingAlgorithm::L2:
+ {
+ return [](float & accu, float value) {
+ accu += (value*value);
+ };
+ }
+
+ default:
+ {
+ throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+ }
+ }
+ }
+
+ using Executor = std::function<void(float & accumulated, float kernelSize)>;
+
+ Executor GetExecutor(PoolingAlgorithm algorithm)
+ {
+ switch (algorithm)
+ {
+ case PoolingAlgorithm::Max:
+ {
+ return [](float & accumulated, float kernelSize) {};
+ }
+
+ case PoolingAlgorithm::Average:
+ {
+ return [](float & accumulated, float kernelSize) {
+ accumulated /= kernelSize;
+ };
+ }
+
+ case PoolingAlgorithm::L2:
+ {
+ return [](float & accumulated, float kernelSize) {
+ accumulated = sqrtf(accumulated / kernelSize);
+ };
+ }
+
+ default:
+ {
+ throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+ }
+ }
+ }
+
+ bool OnPaddingOnly(int start, int end, int maxRange, int padding)
+ {
+ if (end <= 0 || start > (maxRange - padding))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+
+ bool ClampRange(int & start, int & end, int maxRange)
+ {
+ if (start < 0 || end > maxRange)
+ {
+ start = std::min(std::max(start, 0), maxRange);
+ end = std::min(std::max(end, 0), maxRange);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+}
+
+namespace armnn
+{
+
+void Pooling2d(const float* in,
+ float* out,
+ const TensorInfo& inputInfo,
+ const TensorInfo& outputInfo,
+ const Pooling2dDescriptor& params)
+{
+ const int batchSize = boost::numeric_cast<int>(outputInfo.GetShape()[0]);
+ const int channels = boost::numeric_cast<int>(outputInfo.GetShape()[1]);
+ const int heightOutput = boost::numeric_cast<int>(outputInfo.GetShape()[2]);
+ const int widthOutput = boost::numeric_cast<int>(outputInfo.GetShape()[3]);
+ const int heightInput = boost::numeric_cast<int>(inputInfo.GetShape()[2]);
+ const int widthInput = boost::numeric_cast<int>(inputInfo.GetShape()[3]);
+ const int padLeft = boost::numeric_cast<int>(params.m_PadLeft);
+ const int padRight = boost::numeric_cast<int>(params.m_PadRight);
+ const int padTop = boost::numeric_cast<int>(params.m_PadTop);
+ const int padBottom = boost::numeric_cast<int>(params.m_PadBottom);
+ const int strideX = boost::numeric_cast<int>(params.m_StrideX);
+ const int strideY = boost::numeric_cast<int>(params.m_StrideY);
+ const int poolHeight = boost::numeric_cast<int>(params.m_PoolHeight);
+ const int poolWidth = boost::numeric_cast<int>(params.m_PoolWidth);
+
+ float defaultInitializer = DefaultInitializer(params.m_PoolType);
+
+ Accumulator accumulate = GetAccumulator(params.m_PoolType);
+ Executor execute = GetExecutor(params.m_PoolType);
+
+ // Check supported padding methods outside the loop to simplify
+ // the inner loop.
+ if (params.m_PaddingMethod != PaddingMethod::Exclude &&
+ params.m_PaddingMethod != PaddingMethod::IgnoreValue)
+ {
+ throw armnn::InvalidArgumentException("Unsupported padding type");
+ }
+
+ for (int n = 0; n < batchSize; n++)
+ {
+ for (int c = 0; c < channels; c++)
+ {
+ for (int yOutput = 0; yOutput < heightOutput; yOutput++)
+ {
+ for (int xOutput = 0; xOutput < widthOutput; xOutput++)
+ {
+ int hstart = (yOutput * strideY) - padTop;
+ int wstart = (xOutput * strideX) - padLeft;
+ int hend = hstart + poolHeight;
+ int wend = wstart + poolWidth;
+
+ // Clamp the pooling region inside the valid input area (which includes the padding).
+ // This is necessary because the final pooling in a row may overlap beyond the padding.
+ hend = std::min(hend, heightInput + padBottom);
+ wend = std::min(wend, widthInput + padRight);
+
+ float result = defaultInitializer;
+ float poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
+
+ // Special case: when the pooling kernel is over a padding region and the padding
+ // size is larger or equal to the kernel and the kernel only covers
+ // padding and no real values, then we initialize the result as zero
+ // by convention. This is because we need to choose a value here and
+ // all values we have are padding, which we ignore.
+ if (OnPaddingOnly(hstart, hend, heightInput, padBottom) ||
+ OnPaddingOnly(wstart, wend, widthInput, padRight))
+ {
+ result = 0.0f;
+ }
+
+ bool clamped = ClampRange(wstart, wend, widthInput);
+ clamped |= ClampRange(hstart, hend, heightInput);
+
+ if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
+ {
+ // When we exclude the padding, it means we calculate with a smaller
+ // kernel size, so I changed the divisor here.
+ poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
+ }
+
+ for (auto yInput = hstart; yInput < hend; yInput++)
+ {
+ for (auto xInput = wstart; xInput < wend; xInput++)
+ {
+ float inval = in[n * widthInput * heightInput * channels +
+ c * widthInput * heightInput +
+ yInput * widthInput +
+ xInput];
+
+ accumulate(result, inval);
+ }
+ }
+
+ execute(result, poolAreaSize);
+
+ out[n * widthOutput * heightOutput * channels +
+ c * widthOutput * heightOutput +
+ yOutput * widthOutput +
+ xOutput] = result;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Pooling2d.hpp b/src/backends/reference/workloads/Pooling2d.hpp
new file mode 100644
index 0000000000..da56b25c4e
--- /dev/null
+++ b/src/backends/reference/workloads/Pooling2d.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+/// Computes the Pooling2d operation.
+void Pooling2d(const float* in,
+ float* out,
+ const TensorInfo& inputInfo,
+ const TensorInfo& outputInfo,
+ const Pooling2dDescriptor& params);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefActivationFloat32Workload.cpp b/src/backends/reference/workloads/RefActivationFloat32Workload.cpp
new file mode 100644
index 0000000000..3cc59be7a4
--- /dev/null
+++ b/src/backends/reference/workloads/RefActivationFloat32Workload.cpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefActivationFloat32Workload.hpp"
+
+#include "Activation.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefActivationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationFloat32Workload_Execute");
+
+ Activation(GetInputTensorDataFloat(0, m_Data),
+ GetOutputTensorDataFloat(0, m_Data),
+ GetTensorInfo(m_Data.m_Inputs[0]),
+ m_Data.m_Parameters.m_Function,
+ m_Data.m_Parameters.m_A,
+ m_Data.m_Parameters.m_B);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefActivationFloat32Workload.hpp b/src/backends/reference/workloads/RefActivationFloat32Workload.hpp
new file mode 100644
index 0000000000..0de33f02ff
--- /dev/null
+++ b/src/backends/reference/workloads/RefActivationFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+
+namespace armnn
+{
+
+class RefActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor>
+{
+public:
+ using Float32Workload<ActivationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefActivationUint8Workload.cpp b/src/backends/reference/workloads/RefActivationUint8Workload.cpp
new file mode 100644
index 0000000000..b95c2e22a8
--- /dev/null
+++ b/src/backends/reference/workloads/RefActivationUint8Workload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefActivationUint8Workload.hpp"
+
+#include "Activation.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefActivationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationUint8Workload_Execute");
+
+ const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo);
+
+ std::vector<float> results(tensorInfo.GetNumElements());
+
+ Activation(dequant.data(),
+ results.data(),
+ tensorInfo,
+ m_Data.m_Parameters.m_Function,
+ m_Data.m_Parameters.m_A,
+ m_Data.m_Parameters.m_B);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0]));
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefActivationUint8Workload.hpp b/src/backends/reference/workloads/RefActivationUint8Workload.hpp
new file mode 100644
index 0000000000..f38888a9f7
--- /dev/null
+++ b/src/backends/reference/workloads/RefActivationUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor>
+{
+public:
+ using Uint8Workload<ActivationQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefArithmeticWorkload.cpp b/src/backends/reference/workloads/RefArithmeticWorkload.cpp
new file mode 100644
index 0000000000..6c39fa1186
--- /dev/null
+++ b/src/backends/reference/workloads/RefArithmeticWorkload.cpp
@@ -0,0 +1,69 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefArithmeticWorkload.hpp"
+#include "ArithmeticFunction.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "Profiling.hpp"
+#include <vector>
+
+namespace armnn
+{
+
+template <typename ParentDescriptor, typename Functor>
+void BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString);
+
+ auto data = Float32Workload<ParentDescriptor>::GetData();
+ const TensorShape& inShape0 = GetTensorInfo(data.m_Inputs[0]).GetShape();
+ const TensorShape& inShape1 = GetTensorInfo(data.m_Inputs[1]).GetShape();
+ const TensorShape& outShape = GetTensorInfo(data.m_Outputs[0]).GetShape();
+
+ const float* inData0 = GetInputTensorDataFloat(0, data);
+ const float* inData1 = GetInputTensorDataFloat(1, data);
+ float* outData = GetOutputTensorDataFloat(0, data);
+
+ ArithmeticFunction<Functor>(inShape0, inShape1, outShape, inData0, inData1, outData);
+}
+
+template <typename ParentDescriptor, typename Functor>
+void BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString);
+
+ auto data = Uint8Workload<ParentDescriptor>::GetData();
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(data.m_Inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
+
+ auto dequant0 = Dequantize(GetInputTensorDataU8(0, data), inputInfo0);
+ auto dequant1 = Dequantize(GetInputTensorDataU8(1, data), inputInfo1);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+
+ ArithmeticFunction<Functor>(inputInfo0.GetShape(),
+ inputInfo1.GetShape(),
+ outputInfo.GetShape(),
+ dequant0.data(),
+ dequant1.data(),
+ results.data());
+
+ Quantize(GetOutputTensorDataU8(0, data), results.data(), outputInfo);
+}
+
+}
+
+template class armnn::BaseFloat32ArithmeticWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>;
+template class armnn::BaseUint8ArithmeticWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>;
+
+template class armnn::BaseFloat32ArithmeticWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>;
+template class armnn::BaseUint8ArithmeticWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>;
+
+template class armnn::BaseFloat32ArithmeticWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>;
+template class armnn::BaseUint8ArithmeticWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>;
+
+template class armnn::BaseFloat32ArithmeticWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>;
+template class armnn::BaseUint8ArithmeticWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>;
diff --git a/src/backends/reference/workloads/RefArithmeticWorkload.hpp b/src/backends/reference/workloads/RefArithmeticWorkload.hpp
new file mode 100644
index 0000000000..7197b7a883
--- /dev/null
+++ b/src/backends/reference/workloads/RefArithmeticWorkload.hpp
@@ -0,0 +1,122 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+#include "backends/StringMapping.hpp"
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+template <typename Functor,
+ typename armnn::DataType DataType,
+ typename ParentDescriptor,
+ typename armnn::StringMapping::Id DebugString>
+class RefArithmeticWorkload
+{
+ // Needs specialization. The default is empty on purpose.
+};
+
+template <typename ParentDescriptor, typename Functor>
+class BaseFloat32ArithmeticWorkload : public Float32Workload<ParentDescriptor>
+{
+public:
+ using Float32Workload<ParentDescriptor>::Float32Workload;
+ void ExecuteImpl(const char * debugString) const;
+};
+
+template <typename Functor,
+ typename ParentDescriptor,
+ typename armnn::StringMapping::Id DebugString>
+class RefArithmeticWorkload<Functor, armnn::DataType::Float32, ParentDescriptor, DebugString>
+ : public BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>
+{
+public:
+ using BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>::BaseFloat32ArithmeticWorkload;
+
+ virtual void Execute() const override
+ {
+ using Parent = BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>;
+ Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString));
+ }
+};
+
+template <typename ParentDescriptor, typename Functor>
+class BaseUint8ArithmeticWorkload : public Uint8Workload<ParentDescriptor>
+{
+public:
+ using Uint8Workload<ParentDescriptor>::Uint8Workload;
+ void ExecuteImpl(const char * debugString) const;
+};
+
+template <typename Functor,
+ typename ParentDescriptor,
+ typename armnn::StringMapping::Id DebugString>
+class RefArithmeticWorkload<Functor, armnn::DataType::QuantisedAsymm8, ParentDescriptor, DebugString>
+ : public BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>
+{
+public:
+ using BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>::BaseUint8ArithmeticWorkload;
+
+ virtual void Execute() const override
+ {
+ using Parent = BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>;
+ Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString));
+ }
+};
+
+using RefAdditionFloat32Workload =
+ RefArithmeticWorkload<std::plus<float>,
+ DataType::Float32,
+ AdditionQueueDescriptor,
+ StringMapping::RefAdditionWorkload_Execute>;
+
+using RefAdditionUint8Workload =
+ RefArithmeticWorkload<std::plus<float>,
+ DataType::QuantisedAsymm8,
+ AdditionQueueDescriptor,
+ StringMapping::RefAdditionWorkload_Execute>;
+
+
+using RefSubtractionFloat32Workload =
+ RefArithmeticWorkload<std::minus<float>,
+ DataType::Float32,
+ SubtractionQueueDescriptor,
+ StringMapping::RefSubtractionWorkload_Execute>;
+
+using RefSubtractionUint8Workload =
+ RefArithmeticWorkload<std::minus<float>,
+ DataType::QuantisedAsymm8,
+ SubtractionQueueDescriptor,
+ StringMapping::RefSubtractionWorkload_Execute>;
+
+using RefMultiplicationFloat32Workload =
+ RefArithmeticWorkload<std::multiplies<float>,
+ DataType::Float32,
+ MultiplicationQueueDescriptor,
+ StringMapping::RefMultiplicationWorkload_Execute>;
+
+using RefMultiplicationUint8Workload =
+ RefArithmeticWorkload<std::multiplies<float>,
+ DataType::QuantisedAsymm8,
+ MultiplicationQueueDescriptor,
+ StringMapping::RefMultiplicationWorkload_Execute>;
+
+using RefDivisionFloat32Workload =
+ RefArithmeticWorkload<std::divides<float>,
+ DataType::Float32,
+ DivisionQueueDescriptor,
+ StringMapping::RefDivisionWorkload_Execute>;
+
+using RefDivisionUint8Workload =
+ RefArithmeticWorkload<std::divides<float>,
+ DataType::QuantisedAsymm8,
+ DivisionQueueDescriptor,
+ StringMapping::RefDivisionWorkload_Execute>;
+
+} // armnn
diff --git a/src/backends/reference/workloads/RefBaseConstantWorkload.cpp b/src/backends/reference/workloads/RefBaseConstantWorkload.cpp
new file mode 100644
index 0000000000..647677b4fb
--- /dev/null
+++ b/src/backends/reference/workloads/RefBaseConstantWorkload.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefBaseConstantWorkload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Types.hpp>
+
+#include <boost/assert.hpp>
+
+#include <cstring>
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+void RefBaseConstantWorkload<DataType>::Execute() const
+{
+ // Considering the reference backend independently, it could be possible to initialise the intermediate tensor
+ // created by the layer output handler at workload construction time, rather than at workload execution time.
+ // However, this is not an option for other backends (e.g. CL). For consistency, we prefer to align all
+ // implementations.
+ // A similar argument can be made about performing the memory copy in the first place (the layer output handler
+ // could have a non-owning reference to the layer output tensor managed by the const input layer); again, this is
+ // not an option for other backends, and the extra complexity required to make this work for the reference backend
+ // may not be worth the effort (skipping a memory copy in the first inference).
+ if (!m_RanOnce)
+ {
+ const ConstantQueueDescriptor& data = this->m_Data;
+
+ BOOST_ASSERT(data.m_LayerOutput != nullptr);
+
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
+ BOOST_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes());
+
+ memcpy(GetOutputTensorData<void>(0, data), data.m_LayerOutput->GetConstTensor<void>(),
+ outputInfo.GetNumBytes());
+
+ m_RanOnce = true;
+ }
+}
+
+template class RefBaseConstantWorkload<DataType::Float32>;
+template class RefBaseConstantWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBaseConstantWorkload.hpp b/src/backends/reference/workloads/RefBaseConstantWorkload.hpp
new file mode 100644
index 0000000000..8dc9fd6104
--- /dev/null
+++ b/src/backends/reference/workloads/RefBaseConstantWorkload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Constant layer common to all data types.
+template <armnn::DataType DataType>
+class RefBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType>
+{
+public:
+ RefBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info)
+ , m_RanOnce(false)
+ {
+ }
+
+ virtual void Execute() const override;
+
+private:
+ mutable bool m_RanOnce;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..313af9c438
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefBatchNormalizationFloat32Workload.hpp"
+
+#include "BatchNormImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+RefBatchNormalizationFloat32Workload::RefBatchNormalizationFloat32Workload(
+ const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info),
+ m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))),
+ m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))),
+ m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))),
+ m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {}
+
+void RefBatchNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute");
+
+ const float* var = m_Variance->GetConstTensor<float>();
+ const float* mean = m_Mean->GetConstTensor<float>();
+ const float* gamma = m_Gamma->GetConstTensor<float>();
+ const float* beta = m_Beta->GetConstTensor<float>();
+
+ auto inputData = GetInputTensorDataFloat(0, m_Data);
+ auto outputData = GetOutputTensorDataFloat(0, m_Data);
+
+ BatchNormImpl(m_Data, var, mean, gamma, beta, outputData, inputData);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..15c843c2ca
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
+{
+public:
+ explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Mean;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Variance;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Beta;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Gamma;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp
new file mode 100644
index 0000000000..e248ad4b9d
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefBatchNormalizationUint8Workload.hpp"
+
+#include "BatchNormImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+RefBatchNormalizationUint8Workload::RefBatchNormalizationUint8Workload(
+ const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<BatchNormalizationQueueDescriptor>(descriptor, info),
+ m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))),
+ m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))),
+ m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))),
+ m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {}
+
+void RefBatchNormalizationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& varInfo = GetTensorInfo(m_Variance.get());
+ const TensorInfo& meanInfo = GetTensorInfo(m_Mean.get());
+ const TensorInfo& gammaInfo = GetTensorInfo(m_Gamma.get());
+ const TensorInfo& betaInfo = GetTensorInfo(m_Beta.get());
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0);
+ auto var = Dequantize(m_Variance->GetConstTensor<uint8_t>(), varInfo);
+ auto mean = Dequantize(m_Mean->GetConstTensor<uint8_t>(), meanInfo);
+ auto gamma = Dequantize(m_Gamma->GetConstTensor<uint8_t>(), gammaInfo);
+ auto beta = Dequantize(m_Beta->GetConstTensor<uint8_t>(), betaInfo);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data());
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp
new file mode 100644
index 0000000000..d3e8e0a120
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefBatchNormalizationUint8Workload : public Uint8Workload<BatchNormalizationQueueDescriptor>
+{
+public:
+ explicit RefBatchNormalizationUint8Workload(const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Mean;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Variance;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Beta;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Gamma;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConstantFloat32Workload.cpp b/src/backends/reference/workloads/RefConstantFloat32Workload.cpp
new file mode 100644
index 0000000000..074e8ccaae
--- /dev/null
+++ b/src/backends/reference/workloads/RefConstantFloat32Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConstantFloat32Workload.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConstantFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantFloat32Workload_Execute");
+ RefBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConstantFloat32Workload.hpp b/src/backends/reference/workloads/RefConstantFloat32Workload.hpp
new file mode 100644
index 0000000000..76e3a42026
--- /dev/null
+++ b/src/backends/reference/workloads/RefConstantFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class RefConstantFloat32Workload : public RefBaseConstantWorkload<DataType::Float32>
+{
+public:
+ using RefBaseConstantWorkload<DataType::Float32>::RefBaseConstantWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConstantUint8Workload.cpp b/src/backends/reference/workloads/RefConstantUint8Workload.cpp
new file mode 100644
index 0000000000..07e4719d54
--- /dev/null
+++ b/src/backends/reference/workloads/RefConstantUint8Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConstantUint8Workload.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConstantUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantUint8Workload_Execute");
+ RefBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConstantUint8Workload.hpp b/src/backends/reference/workloads/RefConstantUint8Workload.hpp
new file mode 100644
index 0000000000..02552ac80b
--- /dev/null
+++ b/src/backends/reference/workloads/RefConstantUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class RefConstantUint8Workload : public RefBaseConstantWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using RefBaseConstantWorkload<DataType::QuantisedAsymm8>::RefBaseConstantWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
new file mode 100644
index 0000000000..e148bf6a9d
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConvertFp16ToFp32Workload.hpp"
+#include "Half.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "FloatingPointConverter.hpp"
+
+namespace armnn
+{
+
+void RefConvertFp16ToFp32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute");
+
+ const Half* const input = GetInputTensorDataHalf(0, m_Data);
+ float* const output = GetOutputTensorDataFloat(0, m_Data);
+
+ unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp
new file mode 100644
index 0000000000..5e841ba34f
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>
+{
+public:
+ using Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>::Float16ToFloat32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
new file mode 100644
index 0000000000..efaaf8e1ad
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConvertFp32ToFp16Workload.hpp"
+
+#include "Half.hpp"
+#include "FloatingPointConverter.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConvertFp32ToFp16Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute");
+
+ const float* const input = GetInputTensorDataFloat(0, m_Data);
+ Half* const output = GetOutputTensorDataHalf(0, m_Data);
+
+ // convert Fp32 input to Fp16 output
+ unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp
new file mode 100644
index 0000000000..0754fd5c79
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>
+{
+public:
+ using Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>::Float32ToFloat16Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
new file mode 100644
index 0000000000..20905646d7
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConvolution2dFloat32Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+RefConvolution2dFloat32Workload::RefConvolution2dFloat32Workload(
+ const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
+
+void RefConvolution2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute");
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ const float* weightData = m_Weight->template GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Bias->template GetConstTensor<float>() : nullptr;
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
+
+ ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>(
+ m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp
new file mode 100644
index 0000000000..34489e807c
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
+{
+public:
+ explicit RefConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000000..881e9bf6b0
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
@@ -0,0 +1,45 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConvolution2dUint8Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+RefConvolution2dUint8Workload::RefConvolution2dUint8Workload(
+ const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
+
+void RefConvolution2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dUint8Workload_Execute");
+
+ const uint8_t* inputData = GetInputTensorDataU8(0, m_Data);
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
+ const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Bias->template GetConstTensor<int32_t>() :
+ nullptr;
+ uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
+
+ ConvImpl<armnn::Convolution2dQueueDescriptor, uint8_t, int32_t, int32_t>(
+ m_Data,
+ inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
+ weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
+ biasData,
+ outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000000..0e2dd6aada
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
+{
+public:
+ explicit RefConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
new file mode 100644
index 0000000000..e89013b9bd
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefDepthwiseConvolution2dFloat32Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+RefDepthwiseConvolution2dFloat32Workload::RefDepthwiseConvolution2dFloat32Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
+
+void RefDepthwiseConvolution2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute");
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ const float* weightData = m_Weight->template GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Bias->template GetConstTensor<float>() : nullptr;
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
+
+ ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float>
+ (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp
new file mode 100644
index 0000000000..8f1227e2de
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ explicit RefDepthwiseConvolution2dFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000000..e8e501d6ae
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
@@ -0,0 +1,46 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefDepthwiseConvolution2dUint8Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+RefDepthwiseConvolution2dUint8Workload::RefDepthwiseConvolution2dUint8Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
+
+void RefDepthwiseConvolution2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute");
+
+ const uint8_t* inputData = GetInputTensorDataU8(0, m_Data);
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
+ const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Bias->template GetConstTensor<int32_t>() :
+ nullptr;
+ uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
+
+ ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, uint8_t, int32_t, int32_t>(
+ m_Data,
+ inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
+ weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
+ biasData,
+ outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000000..c615cf7880
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ explicit RefDepthwiseConvolution2dUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp
new file mode 100644
index 0000000000..3e16f60b11
--- /dev/null
+++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefFakeQuantizationFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <boost/numeric/conversion/cast.hpp>
+
+namespace armnn
+{
+
+void FakeQuantization(const float* inputData, float* outputData, uint32_t numElements, float min, float max)
+{
+ float scale = (max - min) / 255.f;
+ int32_t offset = boost::numeric_cast<int32_t>((-min * 255.f) / (max - min));
+
+ for (uint32_t i = 0; i < numElements; i++)
+ {
+ outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset));
+ }
+
+}
+
+void RefFakeQuantizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ FakeQuantization(inputData, outputData, inputInfo.GetNumElements(),
+ m_Data.m_Parameters.m_Min,
+ m_Data.m_Parameters.m_Max);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp
new file mode 100644
index 0000000000..523fdcff50
--- /dev/null
+++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFakeQuantizationFloat32Workload : public Float32Workload<FakeQuantizationQueueDescriptor>
+{
+public:
+ using Float32Workload<FakeQuantizationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFloorFloat32Workload.cpp b/src/backends/reference/workloads/RefFloorFloat32Workload.cpp
new file mode 100644
index 0000000000..cc1f8800dc
--- /dev/null
+++ b/src/backends/reference/workloads/RefFloorFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefFloorFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefFloorFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute");
+
+ const float* const input = GetInputTensorDataFloat(0, m_Data);
+ float* const output = GetOutputTensorDataFloat(0, m_Data);
+
+ unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ for (unsigned int i = 0; i < numElements; ++i)
+ {
+ output[i] = floorf(input[i]);
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFloorFloat32Workload.hpp b/src/backends/reference/workloads/RefFloorFloat32Workload.hpp
new file mode 100644
index 0000000000..d7cfa50365
--- /dev/null
+++ b/src/backends/reference/workloads/RefFloorFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor>
+{
+public:
+ using Float32Workload<FloorQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp
new file mode 100644
index 0000000000..ccaf4cd87b
--- /dev/null
+++ b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefFullyConnectedFloat32Workload.hpp"
+
+#include "FullyConnected.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+RefFullyConnectedFloat32Workload::RefFullyConnectedFloat32Workload(
+ const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
+
+void RefFullyConnectedFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ const float* weightData = m_Weight->GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->GetConstTensor<float>() : nullptr;
+
+ FullyConnected(inputData,
+ outputData,
+ inputInfo,
+ outputInfo,
+ weightData,
+ biasData,
+ m_Data.m_Parameters.m_TransposeWeightMatrix);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp
new file mode 100644
index 0000000000..ce058690ac
--- /dev/null
+++ b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor>
+{
+public:
+ explicit RefFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp
new file mode 100644
index 0000000000..cd785d786c
--- /dev/null
+++ b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefFullyConnectedUint8Workload.hpp"
+
+#include "FullyConnected.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+RefFullyConnectedUint8Workload::RefFullyConnectedUint8Workload(
+ const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<FullyConnectedQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
+
+void RefFullyConnectedUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedUint8Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ const uint8_t* weightData = m_Weight->GetConstTensor<uint8_t>();
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+
+ auto weight = Dequantize(weightData, m_Weight->GetTensorInfo());
+
+ std::vector<float> results(outputInfo.GetNumElements());
+
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ const int32_t* biasData = m_Bias->GetConstTensor<int32_t>();
+ auto bias = Dequantize(biasData, m_Bias->GetTensorInfo());
+
+ FullyConnected(dequant.data(),
+ results.data(),
+ inputInfo,
+ outputInfo,
+ weight.data(),
+ bias.data(),
+ m_Data.m_Parameters.m_TransposeWeightMatrix);
+ }
+ else
+ {
+ FullyConnected(dequant.data(),
+ results.data(),
+ inputInfo,
+ outputInfo,
+ weight.data(),
+ nullptr,
+ m_Data.m_Parameters.m_TransposeWeightMatrix);
+ }
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp
new file mode 100644
index 0000000000..e489cc7d81
--- /dev/null
+++ b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFullyConnectedUint8Workload : public Uint8Workload<FullyConnectedQueueDescriptor>
+{
+public:
+ explicit RefFullyConnectedUint8Workload(const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..973c87b009
--- /dev/null
+++ b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp
@@ -0,0 +1,61 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefL2NormalizationFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "TensorBufferArrayView.hpp"
+
+#include "Profiling.hpp"
+
+#include <cmath>
+
+namespace armnn
+{
+
+void RefL2NormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ TensorBufferArrayView<const float> input(inputInfo.GetShape(), GetInputTensorDataFloat(0, m_Data));
+ TensorBufferArrayView<float> output(outputInfo.GetShape(), GetOutputTensorDataFloat(0, m_Data));
+
+ const unsigned int batchSize = inputInfo.GetShape()[0];
+ const unsigned int depth = inputInfo.GetShape()[1];
+ const unsigned int rows = inputInfo.GetShape()[2];
+ const unsigned int cols = inputInfo.GetShape()[3];
+
+ for (unsigned int n = 0; n < batchSize; ++n)
+ {
+ for (unsigned int d = 0; d < depth; ++d)
+ {
+ for (unsigned int h = 0; h < rows; ++h)
+ {
+ for (unsigned int w = 0; w < cols; ++w)
+ {
+ float reduction = 0.0;
+ for (unsigned int c = 0; c < depth; ++c)
+ {
+ const float value = input.Get(n, c, h, w);
+ reduction += value * value;
+ }
+
+ // Using std::max(reduction, epsilon) below would prevent against division by 0.
+ // However, at the time of writing:
+ // - This is not supported by the ACL functions used to implement L2Normalization in the CL
+ // backend.
+ // - The reference semantics for this operator do not include this parameter.
+ const float scale = 1.0f / sqrtf(reduction);
+ output.Get(n, d, h, w) = input.Get(n, d, h, w) * scale;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..a3f03f3060
--- /dev/null
+++ b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
+{
+public:
+ using Float32Workload<L2NormalizationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefLstmFloat32Workload.cpp b/src/backends/reference/workloads/RefLstmFloat32Workload.cpp
new file mode 100644
index 0000000000..50ff605701
--- /dev/null
+++ b/src/backends/reference/workloads/RefLstmFloat32Workload.cpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefLstmFloat32Workload.hpp"
+
+namespace armnn
+{
+
+void RefLstmFloat32Workload::Execute() const
+{
+ throw armnn::Exception("No implementation of Lstm in the Ref backend!");
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefLstmFloat32Workload.hpp b/src/backends/reference/workloads/RefLstmFloat32Workload.hpp
new file mode 100644
index 0000000000..fc4f7776c6
--- /dev/null
+++ b/src/backends/reference/workloads/RefLstmFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefLstmFloat32Workload : public Float32Workload<LstmQueueDescriptor>
+{
+public:
+ using Float32Workload<LstmQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefMergerFloat32Workload.cpp b/src/backends/reference/workloads/RefMergerFloat32Workload.cpp
new file mode 100644
index 0000000000..b1f8a32ee7
--- /dev/null
+++ b/src/backends/reference/workloads/RefMergerFloat32Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefMergerFloat32Workload.hpp"
+
+#include "Merger.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefMergerFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerFloat32Workload_Execute");
+ Merger<float>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefMergerFloat32Workload.hpp b/src/backends/reference/workloads/RefMergerFloat32Workload.hpp
new file mode 100644
index 0000000000..23a523c852
--- /dev/null
+++ b/src/backends/reference/workloads/RefMergerFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefMergerFloat32Workload : public Float32Workload<MergerQueueDescriptor>
+{
+public:
+ using Float32Workload<MergerQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefMergerUint8Workload.cpp b/src/backends/reference/workloads/RefMergerUint8Workload.cpp
new file mode 100644
index 0000000000..47ce1cf731
--- /dev/null
+++ b/src/backends/reference/workloads/RefMergerUint8Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefMergerUint8Workload.hpp"
+
+#include "Merger.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefMergerUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerUint8Workload_Execute");
+ Merger<uint8_t>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefMergerUint8Workload.hpp b/src/backends/reference/workloads/RefMergerUint8Workload.hpp
new file mode 100644
index 0000000000..65dc42120a
--- /dev/null
+++ b/src/backends/reference/workloads/RefMergerUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefMergerUint8Workload : public Uint8Workload<MergerQueueDescriptor>
+{
+public:
+ using Uint8Workload<MergerQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..5c24416624
--- /dev/null
+++ b/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp
@@ -0,0 +1,185 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefNormalizationFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/log/trivial.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+
+namespace armnn
+{
+
+// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization.
+static void NormalizeWithinUingLbr(const float* inputData,
+ float* outputData,
+ const TensorShape& tensorShape,
+ uint32_t norm_size,
+ float alpha,
+ float beta,
+ float kappa)
+{
+ const unsigned int batchSize = tensorShape[0];
+ const unsigned int depth = tensorShape[1];
+ const unsigned int rows = tensorShape[2];
+ const unsigned int cols = tensorShape[3];
+
+ int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */
+
+ for (unsigned int n = 0; n < batchSize; n++)
+ {
+ for (unsigned int c = 0; c < depth; c++)
+ {
+ for (unsigned int h = 0; h < rows; h++)
+ {
+ for (unsigned int w = 0; w < cols; w++)
+ {
+ float accumulated_scale = 0.0;
+ for (int y = -radius; y <= radius; y++)
+ {
+ for (int x = -radius; x <= radius; x++)
+ {
+ int i = boost::numeric_cast<int>(w) + x;
+ int j = boost::numeric_cast<int>(h) + y;
+
+ if ((i < 0) || (i >= boost::numeric_cast<int>(cols)))
+ {
+ continue;
+ }
+
+ if ((j < 0) || (j >= boost::numeric_cast<int>(rows)))
+ {
+ continue;
+ }
+
+ float inval = inputData[n * cols * rows * depth +
+ c * cols * rows +
+ boost::numeric_cast<unsigned int>(j) * cols +
+ boost::numeric_cast<unsigned int>(i)];
+
+ accumulated_scale += inval*inval;
+ }
+ }
+ outputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w] = inputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w] / (powf((kappa + (accumulated_scale * alpha)), beta));
+ }
+ }
+ }
+ }
+}
+
+// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization.
+void NormalizeAcrossUingLbr(const float* inputData,
+ float* outputData,
+ const TensorShape& tensorShape,
+ uint32_t norm_size,
+ float alpha,
+ float beta,
+ float kappa)
+{
+ const unsigned int batchSize = tensorShape[0];
+ const unsigned int depth = tensorShape[1];
+ const unsigned int rows = tensorShape[2];
+ const unsigned int cols = tensorShape[3];
+
+ int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */
+
+ for (unsigned int n = 0; n < batchSize; n++)
+ {
+ for (unsigned int c = 0; c < depth; c++)
+ {
+ for (unsigned int h = 0; h < rows; h++)
+ {
+ for (unsigned int w = 0; w < cols; w++)
+ {
+ float accumulated_scale = 0.0;
+ for (int z = -radius; z <= radius; z++)
+ {
+ int k = boost::numeric_cast<int>(c) + z;
+
+ if ((k < 0) || (k >= boost::numeric_cast<int>(depth)))
+ {
+ continue;
+ }
+
+ float inval = inputData[n * cols * rows * depth +
+ boost::numeric_cast<unsigned int>(k) * cols * rows +
+ h * cols +
+ w];
+
+ accumulated_scale += inval*inval;
+ }
+ float scale = kappa + (accumulated_scale * alpha);
+ scale = powf(scale, -beta);
+ outputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w] = scale *
+ inputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w];
+ }
+ }
+ }
+ }
+}
+
+void RefNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+
+
+ if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType)
+ {
+ if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType)
+ {
+ NormalizeWithinUingLbr(inputData,
+ outputData,
+ inputInfo.GetShape(),
+ m_Data.m_Parameters.m_NormSize,
+ m_Data.m_Parameters.m_Alpha,
+ m_Data.m_Parameters.m_Beta,
+ m_Data.m_Parameters.m_K);
+ }
+ else if (NormalizationAlgorithmChannel::Across == m_Data.m_Parameters.m_NormChannelType)
+ {
+ NormalizeAcrossUingLbr(inputData,
+ outputData,
+ inputInfo.GetShape(),
+ m_Data.m_Parameters.m_NormSize,
+ m_Data.m_Parameters.m_Alpha,
+ m_Data.m_Parameters.m_Beta,
+ m_Data.m_Parameters.m_K);
+ }
+ else
+ {
+ BOOST_LOG_TRIVIAL(warning) << "Illegal NORMALIZATION mode in normalization_f32";
+ return;
+ }
+ }
+ else
+ {
+ BOOST_LOG_TRIVIAL(warning) << "Lcr method (Jarret 2009: Local Contrast Normalization) not supported yet.";
+ return;
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..e30356c422
--- /dev/null
+++ b/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
+{
+public:
+ using Float32Workload<NormalizationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp
new file mode 100644
index 0000000000..4093ff38f4
--- /dev/null
+++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefPermuteWorkload.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include <Permute.hpp>
+#include "TypeUtils.hpp"
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+void RefPermuteWorkload<DataType>::Execute() const
+{
+ using T = ResolveType<DataType>;
+
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
+
+ const ITensorHandle* src = m_Data.m_Inputs[0];
+ const ITensorHandle* dst = m_Data.m_Outputs[0];
+ const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
+
+ armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData<T>(src), GetCpuData<T>(dst));
+}
+
+template class RefPermuteWorkload<DataType::Float32>;
+template class RefPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp
new file mode 100644
index 0000000000..d72cf77e74
--- /dev/null
+++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+
+#include <armnn/TypesUtils.hpp>
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+class RefPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType>
+{
+public:
+ static const std::string& GetName()
+ {
+ static const std::string name = std::string("RefPermute") + GetDataTypeName(DataType) + "Workload";
+ return name;
+ }
+
+ using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
+ using TypedWorkload<PermuteQueueDescriptor, DataType>::TypedWorkload;
+ void Execute() const override;
+};
+
+using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>;
+using RefPermuteUint8Workload = RefPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp b/src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp
new file mode 100644
index 0000000000..2542756c26
--- /dev/null
+++ b/src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefPooling2dFloat32Workload.hpp"
+
+#include "Pooling2d.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefPooling2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo0 = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+
+ Pooling2d(inputData,
+ outputData,
+ inputInfo0,
+ outputInfo0,
+ m_Data.m_Parameters);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp b/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp
new file mode 100644
index 0000000000..501fb71aff
--- /dev/null
+++ b/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefPooling2dFloat32Workload : public Float32Workload<Pooling2dQueueDescriptor>
+{
+public:
+ using Float32Workload<Pooling2dQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPooling2dUint8Workload.cpp b/src/backends/reference/workloads/RefPooling2dUint8Workload.cpp
new file mode 100644
index 0000000000..91fdf291ee
--- /dev/null
+++ b/src/backends/reference/workloads/RefPooling2dUint8Workload.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefPooling2dUint8Workload.hpp"
+
+#include "Pooling2d.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefPooling2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dUint8Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ Pooling2d(dequant.data(),
+ results.data(),
+ inputInfo,
+ outputInfo,
+ m_Data.m_Parameters);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp b/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp
new file mode 100644
index 0000000000..6544f9a785
--- /dev/null
+++ b/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefPooling2dUint8Workload : public Uint8Workload<Pooling2dQueueDescriptor>
+{
+public:
+ using Uint8Workload<Pooling2dQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefReshapeFloat32Workload.cpp b/src/backends/reference/workloads/RefReshapeFloat32Workload.cpp
new file mode 100644
index 0000000000..99c94a49a1
--- /dev/null
+++ b/src/backends/reference/workloads/RefReshapeFloat32Workload.cpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefReshapeFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <cstring>
+
+namespace armnn
+{
+
+void RefReshapeFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeFloat32Workload_Execute");
+
+ void* output = GetOutputTensorData<void>(0, m_Data);
+ const void* input = GetInputTensorData<void>(0, m_Data);
+ unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes();
+ memcpy(output, input, numBytes);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp b/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp
new file mode 100644
index 0000000000..9281e89cf7
--- /dev/null
+++ b/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor>
+{
+public:
+ using Float32Workload<ReshapeQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefReshapeUint8Workload.cpp b/src/backends/reference/workloads/RefReshapeUint8Workload.cpp
new file mode 100644
index 0000000000..8f475f3db3
--- /dev/null
+++ b/src/backends/reference/workloads/RefReshapeUint8Workload.cpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefReshapeUint8Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <cstring>
+
+namespace armnn
+{
+
+void RefReshapeUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeUint8Workload_Execute");
+
+ void* output = GetOutputTensorData<void>(0, m_Data);
+ const void* input = GetInputTensorData<void>(0, m_Data);
+ unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes();
+ memcpy(output, input, numBytes);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefReshapeUint8Workload.hpp b/src/backends/reference/workloads/RefReshapeUint8Workload.hpp
new file mode 100644
index 0000000000..b37fb4bdeb
--- /dev/null
+++ b/src/backends/reference/workloads/RefReshapeUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor>
+{
+public:
+ using Uint8Workload<ReshapeQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp
new file mode 100644
index 0000000000..50ee7a218a
--- /dev/null
+++ b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefResizeBilinearFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "ResizeBilinear.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefResizeBilinearFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ ResizeBilinear(GetInputTensorDataFloat(0, m_Data),
+ inputInfo,
+ GetOutputTensorDataFloat(0, m_Data),
+ outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp
new file mode 100644
index 0000000000..0fff7ee695
--- /dev/null
+++ b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor>
+{
+public:
+ using Float32Workload<ResizeBilinearQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp
new file mode 100644
index 0000000000..67ab039ef3
--- /dev/null
+++ b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefResizeBilinearUint8Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "ResizeBilinear.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefResizeBilinearUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearUint8Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ ResizeBilinear(dequant.data(), inputInfo, results.data(), outputInfo);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp
new file mode 100644
index 0000000000..bbaf899ca6
--- /dev/null
+++ b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefResizeBilinearUint8Workload : public Uint8Workload<ResizeBilinearQueueDescriptor>
+{
+public:
+ using Uint8Workload<ResizeBilinearQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp
new file mode 100644
index 0000000000..1f519bda10
--- /dev/null
+++ b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefSoftmaxFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "Softmax.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefSoftmaxFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxFloat32Workload_Execute");
+
+ Softmax(GetInputTensorDataFloat(0, m_Data),
+ GetOutputTensorDataFloat(0, m_Data),
+ GetTensorInfo(m_Data.m_Inputs[0]),
+ m_Data.m_Parameters.m_Beta);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp
new file mode 100644
index 0000000000..d37f2b5990
--- /dev/null
+++ b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
+{
+public:
+ using Float32Workload<SoftmaxQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp b/src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp
new file mode 100644
index 0000000000..17114ec83a
--- /dev/null
+++ b/src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp
@@ -0,0 +1,36 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefSoftmaxUint8Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "Softmax.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefSoftmaxUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxUint8Workload_Execute");
+
+ const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo);
+
+ std::vector<float> results(tensorInfo.GetNumElements());
+
+ Softmax(dequant.data(),
+ results.data(),
+ tensorInfo,
+ m_Data.m_Parameters.m_Beta);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0]));
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp b/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp
new file mode 100644
index 0000000000..b179d529da
--- /dev/null
+++ b/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
+{
+public:
+ using Uint8Workload<SoftmaxQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSplitterFloat32Workload.cpp b/src/backends/reference/workloads/RefSplitterFloat32Workload.cpp
new file mode 100644
index 0000000000..75611dacf3
--- /dev/null
+++ b/src/backends/reference/workloads/RefSplitterFloat32Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefSplitterFloat32Workload.hpp"
+
+#include "Splitter.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefSplitterFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterFloat32Workload_Execute");
+ Splitter<float>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp b/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp
new file mode 100644
index 0000000000..12176dd277
--- /dev/null
+++ b/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSplitterFloat32Workload : public Float32Workload<SplitterQueueDescriptor>
+{
+public:
+ using Float32Workload<SplitterQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSplitterUint8Workload.cpp b/src/backends/reference/workloads/RefSplitterUint8Workload.cpp
new file mode 100644
index 0000000000..ca9f5db850
--- /dev/null
+++ b/src/backends/reference/workloads/RefSplitterUint8Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefSplitterUint8Workload.hpp"
+
+#include "Splitter.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefSplitterUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterUint8Workload_Execute");
+ Splitter<uint8_t>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSplitterUint8Workload.hpp b/src/backends/reference/workloads/RefSplitterUint8Workload.hpp
new file mode 100644
index 0000000000..e80cb1a654
--- /dev/null
+++ b/src/backends/reference/workloads/RefSplitterUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSplitterUint8Workload : public Uint8Workload<SplitterQueueDescriptor>
+{
+public:
+ using Uint8Workload<SplitterQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp
new file mode 100644
index 0000000000..616a875028
--- /dev/null
+++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp
@@ -0,0 +1,138 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/CpuTensorHandle.hpp"
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+#include <Half.hpp>
+
+#include <boost/polymorphic_cast.hpp>
+
+namespace armnn
+{
+
+////////////////////////////////////////////
+/// float32 helpers
+////////////////////////////////////////////
+
+inline const TensorInfo& GetTensorInfo(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
+ const ConstCpuTensorHandle* cpuTensorHandle =
+ boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetTensorInfo();
+}
+
+template <typename DataType>
+inline const DataType* GetConstCpuData(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate.
+ const ConstCpuTensorHandle* cpuTensorHandle =
+ boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetConstTensor<DataType>();
+}
+
+template <typename DataType>
+inline DataType* GetCpuData(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
+ const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetTensor<DataType>();
+};
+
+template <typename DataType, typename PayloadType>
+const DataType* GetInputTensorData(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Inputs[idx];
+ return GetConstCpuData<DataType>(tensorHandle);
+}
+
+template <typename DataType, typename PayloadType>
+DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Outputs[idx];
+ return GetCpuData<DataType>(tensorHandle);
+}
+
+template <typename PayloadType>
+const float* GetInputTensorDataFloat(unsigned int idx, const PayloadType& data)
+{
+ return GetInputTensorData<float>(idx, data);
+}
+
+template <typename PayloadType>
+float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data)
+{
+ return GetOutputTensorData<float>(idx, data);
+}
+
+template <typename PayloadType>
+const Half* GetInputTensorDataHalf(unsigned int idx, const PayloadType& data)
+{
+ return GetInputTensorData<Half>(idx, data);
+}
+
+template <typename PayloadType>
+Half* GetOutputTensorDataHalf(unsigned int idx, const PayloadType& data)
+{
+ return GetOutputTensorData<Half>(idx, data);
+}
+
+////////////////////////////////////////////
+/// u8 helpers
+////////////////////////////////////////////
+
+inline const uint8_t* GetConstCpuU8Data(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate.
+ const ConstCpuTensorHandle* cpuTensorHandle =
+ boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetConstTensor<uint8_t>();
+};
+
+inline uint8_t* GetCpuU8Data(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
+ const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetTensor<uint8_t>();
+};
+
+template <typename PayloadType>
+const uint8_t* GetInputTensorDataU8(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Inputs[idx];
+ return GetConstCpuU8Data(tensorHandle);
+}
+
+template <typename PayloadType>
+uint8_t* GetOutputTensorDataU8(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Outputs[idx];
+ return GetCpuU8Data(tensorHandle);
+}
+
+template<typename T>
+std::vector<float> Dequantize(const T* quant, const TensorInfo& info)
+{
+ std::vector<float> ret(info.GetNumElements());
+ for (size_t i = 0; i < info.GetNumElements(); i++)
+ {
+ ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
+ }
+ return ret;
+}
+
+inline void Quantize(uint8_t* quant, const float* dequant, const TensorInfo& info)
+{
+ for (size_t i = 0; i < info.GetNumElements(); i++)
+ {
+ quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
new file mode 100644
index 0000000000..e5c6e1e9d5
--- /dev/null
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -0,0 +1,53 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefConstantUint8Workload.hpp"
+#include "ArithmeticFunction.hpp"
+#include "RefArithmeticWorkload.hpp"
+#include "ConvImpl.hpp"
+#include "RefBaseConstantWorkload.hpp"
+#include "RefConvolution2dUint8Workload.hpp"
+#include "RefSplitterUint8Workload.hpp"
+#include "RefResizeBilinearUint8Workload.hpp"
+#include "RefL2NormalizationFloat32Workload.hpp"
+#include "RefActivationUint8Workload.hpp"
+#include "RefPooling2dFloat32Workload.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "RefMergerUint8Workload.hpp"
+#include "RefFullyConnectedFloat32Workload.hpp"
+#include "Softmax.hpp"
+#include "RefMergerFloat32Workload.hpp"
+#include "TensorBufferArrayView.hpp"
+#include "RefBatchNormalizationFloat32Workload.hpp"
+#include "Splitter.hpp"
+#include "RefFullyConnectedUint8Workload.hpp"
+#include "RefReshapeFloat32Workload.hpp"
+#include "RefDepthwiseConvolution2dUint8Workload.hpp"
+#include "FullyConnected.hpp"
+#include "RefFloorFloat32Workload.hpp"
+#include "RefSoftmaxFloat32Workload.hpp"
+#include "RefSoftmaxUint8Workload.hpp"
+#include "RefReshapeUint8Workload.hpp"
+#include "RefResizeBilinearFloat32Workload.hpp"
+#include "RefBatchNormalizationUint8Workload.hpp"
+#include "ResizeBilinear.hpp"
+#include "RefNormalizationFloat32Workload.hpp"
+#include "RefDepthwiseConvolution2dFloat32Workload.hpp"
+#include "RefPooling2dUint8Workload.hpp"
+#include "BatchNormImpl.hpp"
+#include "Activation.hpp"
+#include "Merger.hpp"
+#include "RefSplitterFloat32Workload.hpp"
+#include "RefConstantFloat32Workload.hpp"
+#include "RefActivationFloat32Workload.hpp"
+#include "RefConvolution2dFloat32Workload.hpp"
+#include "Pooling2d.hpp"
+#include "RefFakeQuantizationFloat32Workload.hpp"
+#include "RefPermuteWorkload.hpp"
+#include "RefLstmFloat32Workload.hpp"
+#include "RefConvertFp16ToFp32Workload.hpp"
+#include "RefConvertFp32ToFp16Workload.hpp"
diff --git a/src/backends/reference/workloads/ResizeBilinear.cpp b/src/backends/reference/workloads/ResizeBilinear.cpp
new file mode 100644
index 0000000000..0bce3c7ed8
--- /dev/null
+++ b/src/backends/reference/workloads/ResizeBilinear.cpp
@@ -0,0 +1,92 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ResizeBilinear.hpp"
+
+#include "TensorBufferArrayView.hpp"
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <cmath>
+#include <algorithm>
+
+namespace armnn
+{
+
+namespace
+{
+
+inline float Lerp(float a, float b, float w)
+{
+ return w * b + (1.f - w) * a;
+}
+
+}
+
+void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo)
+{
+ // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
+ // image is projected into the input image to figure out the interpolants and weights. Note that this
+ // will yield different results than if projecting the centre of output texels.
+
+ const unsigned int batchSize = inputInfo.GetShape()[0];
+ const unsigned int channelCount = inputInfo.GetShape()[1];
+
+ const unsigned int inputHeight = inputInfo.GetShape()[2];
+ const unsigned int inputWidth = inputInfo.GetShape()[3];
+ const unsigned int outputHeight = outputInfo.GetShape()[2];
+ const unsigned int outputWidth = outputInfo.GetShape()[3];
+
+ // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates
+ // in the input image.
+ const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
+ const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
+
+ TensorBufferArrayView<const float> input(inputInfo.GetShape(), in);
+ TensorBufferArrayView<float> output(outputInfo.GetShape(), out);
+
+ for (unsigned int n = 0; n < batchSize; ++n)
+ {
+ for (unsigned int c = 0; c < channelCount; ++c)
+ {
+ for (unsigned int y = 0; y < outputHeight; ++y)
+ {
+ // Corresponding real-valued height coordinate in input image.
+ const float iy = boost::numeric_cast<float>(y) * scaleY;
+
+ // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
+ const float fiy = floorf(iy);
+ const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
+
+ // Interpolation weight (range [0,1]).
+ const float yw = iy - fiy;
+
+ for (unsigned int x = 0; x < outputWidth; ++x)
+ {
+ // Real-valued and discrete width coordinates in input image.
+ const float ix = boost::numeric_cast<float>(x) * scaleX;
+ const float fix = floorf(ix);
+ const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
+
+ // Interpolation weight (range [0,1]).
+ const float xw = ix - fix;
+
+ // Discrete width/height coordinates of texels below and to the right of (x0, y0).
+ const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
+ const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
+
+ // Interpolation
+ const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0.
+ const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1.
+ const float l = Lerp(ly0, ly1, yw);
+
+ output.Get(n, c, y, x) = l;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/ResizeBilinear.hpp b/src/backends/reference/workloads/ResizeBilinear.hpp
new file mode 100644
index 0000000000..847b8e8bef
--- /dev/null
+++ b/src/backends/reference/workloads/ResizeBilinear.hpp
@@ -0,0 +1,15 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Softmax.cpp b/src/backends/reference/workloads/Softmax.cpp
new file mode 100644
index 0000000000..4f1016e86c
--- /dev/null
+++ b/src/backends/reference/workloads/Softmax.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Softmax.hpp"
+
+#include <cmath>
+#include <vector>
+
+namespace armnn
+{
+
+/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.
+void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta)
+{
+ unsigned int numChannels = tensorInfo.GetShape()[1];
+ for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++)
+ {
+ // Find maximum channel.
+ float max = in[n * numChannels];
+ for (unsigned int c = 1; c < numChannels; c++)
+ {
+ float val = in[n * numChannels + c];
+ if (val > max)
+ {
+ max = val;
+ }
+ }
+
+ // Exponentiate all values and sum.
+ std::vector<float> exponentials(numChannels);
+ float sum = 0.0f;
+ for (unsigned int c = 0; c < numChannels; c++)
+ {
+ float val = in[n * numChannels + c];
+ exponentials[c] = expf((val - max) * beta);
+ sum += exponentials[c];
+ }
+
+ // Divide exponentials by sum to give outputs.
+ for (unsigned int c = 0; c < numChannels; c++)
+ {
+ out[n * numChannels + c] = exponentials[c] / sum;
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Softmax.hpp b/src/backends/reference/workloads/Softmax.hpp
new file mode 100644
index 0000000000..3b974f9e9e
--- /dev/null
+++ b/src/backends/reference/workloads/Softmax.hpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.
+void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Splitter.hpp b/src/backends/reference/workloads/Splitter.hpp
new file mode 100644
index 0000000000..e9c0379c9e
--- /dev/null
+++ b/src/backends/reference/workloads/Splitter.hpp
@@ -0,0 +1,84 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/assert.hpp>
+
+namespace armnn
+{
+
+template <typename DataType>
+void Splitter(const SplitterQueueDescriptor& data)
+{
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+
+ for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
+ {
+ unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
+
+ unsigned int indexRemainder = index;
+ unsigned int dimensionStride = inputInfo0.GetNumElements();
+
+ for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
+ {
+ dimensionStride /= inputInfo0.GetShape()[i];
+ indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
+ indexRemainder -= indices[i] * dimensionStride;
+ }
+
+ for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
+ {
+ SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
+
+ //Split view extents are defined by the size of (the corresponding) input tensor.
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]);
+ BOOST_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
+
+ // Check all dimensions to see if this element is inside the given input view.
+ bool insideView = true;
+ for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
+ {
+ if (indices[i] < view.m_Origin[i])
+ {
+ insideView = false;
+ }
+ if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
+ {
+ insideView = false;
+ }
+ }
+
+ if (insideView)
+ {
+ unsigned int outIndex = 0;
+ unsigned int dimensionStride = 1;
+
+ for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
+ {
+ outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
+ dimensionStride *= outputInfo.GetShape()[i];
+ }
+
+ //We are within the view, to copy input data to the output corresponding to this view.
+ DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
+ BOOST_ASSERT(outputData);
+
+ const DataType* inputData = GetInputTensorData<DataType>(0, data);
+ BOOST_ASSERT(inputData);
+
+ outputData[outIndex] = inputData[index];
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/TensorBufferArrayView.hpp b/src/backends/reference/workloads/TensorBufferArrayView.hpp
new file mode 100644
index 0000000000..e19810ca87
--- /dev/null
+++ b/src/backends/reference/workloads/TensorBufferArrayView.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/assert.hpp>
+
+namespace armnn
+{
+
+// Utility class providing access to raw tensor memory based on indices along each dimension.
+template <typename DataType>
+class TensorBufferArrayView
+{
+public:
+ TensorBufferArrayView(const TensorShape& shape, DataType* data)
+ : m_Shape(shape)
+ , m_Data(data)
+ {
+ }
+
+ DataType& Get(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const
+ {
+ BOOST_ASSERT( b < m_Shape[0] || (m_Shape[0] == 0 && b == 0) );
+ BOOST_ASSERT( c < m_Shape[1] || (m_Shape[1] == 0 && c == 0) );
+ BOOST_ASSERT( h < m_Shape[2] || (m_Shape[2] == 0 && h == 0) );
+ BOOST_ASSERT( w < m_Shape[3] || (m_Shape[3] == 0 && w == 0) );
+
+ return m_Data[b * m_Shape[1] * m_Shape[2] * m_Shape[3]
+ + c * m_Shape[2] * m_Shape[3]
+ + h * m_Shape[3]
+ + w];
+ }
+
+private:
+ const TensorShape m_Shape;
+ DataType* m_Data;
+};
+
+} //namespace armnn