aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends/RefWorkloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends/RefWorkloads')
-rw-r--r--src/armnn/backends/RefWorkloads/Activation.cpp91
-rw-r--r--src/armnn/backends/RefWorkloads/Activation.hpp20
-rw-r--r--src/armnn/backends/RefWorkloads/Addition.cpp44
-rw-r--r--src/armnn/backends/RefWorkloads/Addition.hpp20
-rw-r--r--src/armnn/backends/RefWorkloads/BatchNormImpl.hpp56
-rw-r--r--src/armnn/backends/RefWorkloads/Broadcast.cpp33
-rw-r--r--src/armnn/backends/RefWorkloads/Broadcast.hpp58
-rw-r--r--src/armnn/backends/RefWorkloads/ConvImpl.cpp71
-rw-r--r--src/armnn/backends/RefWorkloads/ConvImpl.hpp184
-rw-r--r--src/armnn/backends/RefWorkloads/FullyConnected.cpp62
-rw-r--r--src/armnn/backends/RefWorkloads/FullyConnected.hpp22
-rw-r--r--src/armnn/backends/RefWorkloads/Merger.hpp81
-rw-r--r--src/armnn/backends/RefWorkloads/Multiplication.cpp22
-rw-r--r--src/armnn/backends/RefWorkloads/Multiplication.hpp16
-rw-r--r--src/armnn/backends/RefWorkloads/Pooling2d.cpp241
-rw-r--r--src/armnn/backends/RefWorkloads/Pooling2d.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp28
-rw-r--r--src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp38
-rw-r--r--src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.cpp31
-rw-r--r--src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.cpp41
-rw-r--r--src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp49
-rw-r--r--src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp33
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp31
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp40
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp19
-rw-r--r--src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp19
-rw-r--r--src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp20
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp30
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp38
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp30
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp38
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp42
-rw-r--r--src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp29
-rw-r--r--src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp37
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp60
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp61
-rw-r--r--src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp28
-rw-r--r--src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp38
-rw-r--r--src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp185
-rw-r--r--src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp31
-rw-r--r--src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp33
-rw-r--r--src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp33
-rw-r--r--src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp37
-rw-r--r--src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp27
-rw-r--r--src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp27
-rw-r--r--src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp29
-rw-r--r--src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp33
-rw-r--r--src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp26
-rw-r--r--src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp36
-rw-r--r--src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp125
-rw-r--r--src/armnn/backends/RefWorkloads/ResizeBilinear.cpp92
-rw-r--r--src/armnn/backends/RefWorkloads/ResizeBilinear.hpp15
-rw-r--r--src/armnn/backends/RefWorkloads/Softmax.cpp49
-rw-r--r--src/armnn/backends/RefWorkloads/Softmax.hpp16
-rw-r--r--src/armnn/backends/RefWorkloads/Splitter.hpp83
-rw-r--r--src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp42
91 files changed, 3474 insertions, 0 deletions
diff --git a/src/armnn/backends/RefWorkloads/Activation.cpp b/src/armnn/backends/RefWorkloads/Activation.cpp
new file mode 100644
index 0000000000..ede283cbf9
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Activation.cpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "Activation.hpp"
+
+#include <boost/log/trivial.hpp>
+
+#include <cmath>
+
+namespace armnn
+{
+
+void Activation(const float* in,
+ float* out,
+ const TensorInfo& tensorInfo,
+ ActivationFunction function,
+ float a,
+ float b)
+{
+ for (size_t i = 0; i<tensorInfo.GetNumElements(); i++)
+ {
+ float input = in[i];
+ float output;
+
+ // compute the result of the activation function
+ switch (function)
+ {
+ case ActivationFunction::Linear:
+ {
+ output = a * input + b;
+ break;
+ }
+ case ActivationFunction::Sigmoid:
+ {
+ output = 1.f / (1.f + expf(-input));
+ break;
+ }
+ case ActivationFunction::ReLu:
+ {
+ output = std::max(0.f, input);
+ break;
+ }
+ case ActivationFunction::BoundedReLu:
+ {
+ output = std::min(a, std::max(b, input));
+ break;
+ }
+ case ActivationFunction::SoftReLu:
+ {
+ output = logf(1.0f + expf(input));
+ break;
+ }
+ case ActivationFunction::LeakyReLu:
+ {
+ output = input > 0.0f ? input : (input * a);
+ break;
+ }
+ case ActivationFunction::Abs:
+ {
+ output = input < 0 ? -input : input;
+ break;
+ }
+ case ActivationFunction::Sqrt:
+ {
+ output = sqrtf(input);
+ break;
+ }
+ case ActivationFunction::Square:
+ {
+ output = input * input;
+ break;
+ }
+ case ActivationFunction::TanH:
+ {
+ output = a * tanhf(b * input);
+ break;
+ }
+ default:
+ {
+ BOOST_LOG_TRIVIAL(error) << "Unsupported activation function";
+ return;
+ }
+ }
+
+ out[i] = output;
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Activation.hpp b/src/armnn/backends/RefWorkloads/Activation.hpp
new file mode 100644
index 0000000000..874441c862
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Activation.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+/// Performs the ActivationFunction elementwise on the inputs to give the outputs
+void Activation(const float* in,
+ float* out,
+ const TensorInfo& tensorInfo,
+ ActivationFunction function,
+ float a,
+ float b);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Addition.cpp b/src/armnn/backends/RefWorkloads/Addition.cpp
new file mode 100644
index 0000000000..c26f82ecc2
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Addition.cpp
@@ -0,0 +1,44 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "Addition.hpp"
+#include "Broadcast.hpp"
+
+#include <functional>
+
+namespace armnn
+{
+
+namespace
+{
+
+void ElementwiseAddition(unsigned int numElements, const float* inData0, const float* inData1, float* outData)
+{
+ for (unsigned int i = 0; i < numElements; ++i)
+ {
+ outData[i] = inData0[i] + inData1[i];
+ }
+}
+
+} // namespace
+
+void Addition(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData)
+{
+ if (inShape0 == inShape1)
+ {
+ ElementwiseAddition(inShape0.GetNumElements(), inData0, inData1, outData);
+ }
+ else
+ {
+ BroadcastLoop(inShape0, inShape1, outShape).Unroll(std::plus<float>(), 0, inData0, inData1, outData);
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Addition.hpp b/src/armnn/backends/RefWorkloads/Addition.hpp
new file mode 100644
index 0000000000..e62d63ec14
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Addition.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void Addition(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp b/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp
new file mode 100644
index 0000000000..f40a277d17
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp
@@ -0,0 +1,56 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <cmath>
+
+namespace armnn
+{
+
+template<typename NormData>
+static void BatchNormImpl(NormData data,
+ const float* varIn,
+ const float* meanIn,
+ const float* gammaIn,
+ const float* betaIn,
+ float * outputData,
+ const float * inputData)
+{
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+ for (unsigned int c = 0; c < inputInfo0.GetShape()[1]; c++)
+ {
+ float var = varIn[c];
+ float mean = meanIn[c];
+ float gamma = gammaIn[c];
+ float beta = betaIn[c];
+
+ float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
+ float add = beta - mult * mean;
+
+ for (unsigned int n = 0; n < inputInfo0.GetShape()[0]; n++)
+ {
+ for (unsigned int j = 0; j < inputInfo0.GetShape()[2]; j++)
+ {
+ for (unsigned int i = 0; i < inputInfo0.GetShape()[3]; i++)
+ {
+ unsigned int index = i +
+ j*inputInfo0.GetShape()[3] +
+ c*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] +
+ n*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2]
+ * inputInfo0.GetShape()[1];
+
+ outputData[index] = mult * inputData[index] + add;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Broadcast.cpp b/src/armnn/backends/RefWorkloads/Broadcast.cpp
new file mode 100644
index 0000000000..90ccb48616
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Broadcast.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "Broadcast.hpp"
+
+namespace armnn
+{
+
+BroadcastLoop::BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape)
+: m_DimData(outShape.GetNumDimensions())
+{
+ const unsigned int numDims = GetNumDimensions();
+
+ unsigned int sIn0 = 1;
+ unsigned int sIn1 = 1;
+ unsigned int sOut = 1;
+
+ for (unsigned int j = numDims - 1, k = 0; k < numDims ; k++, j--)
+ {
+ m_DimData[j].m_DimSize = outShape[j];
+ m_DimData[j].m_Stride1 = (inShape0[j] > 1) ? sIn0 : 0;
+ m_DimData[j].m_Stride2 = (inShape1[j] > 1) ? sIn1 : 0;
+ m_DimData[j].m_StrideOut = sOut;
+
+ sIn0 *= inShape0[j];
+ sIn1 *= inShape1[j];
+ sOut *= outShape[j];
+ }
+}
+
+} // namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Broadcast.hpp b/src/armnn/backends/RefWorkloads/Broadcast.hpp
new file mode 100644
index 0000000000..b65b57f7a1
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Broadcast.hpp
@@ -0,0 +1,58 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <armnn/Tensor.hpp>
+
+#include <functional>
+
+namespace armnn
+{
+
+struct BroadcastLoop
+{
+ BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape);
+
+ unsigned int GetNumDimensions()
+ {
+ return static_cast<unsigned int>(m_DimData.size());
+ }
+
+ template <typename T0, typename T1, typename U, typename Func>
+ void Unroll(Func operationFunc,
+ unsigned int dimension,
+ const T0* inData0,
+ const T1* inData1,
+ U* outData)
+ {
+ if (dimension >= GetNumDimensions())
+ {
+ *outData = operationFunc(*inData0, *inData1);
+ return;
+ }
+
+ for (unsigned int i = 0; i < m_DimData[dimension].m_DimSize; i++)
+ {
+ Unroll(operationFunc, dimension + 1, inData0, inData1, outData);
+
+ inData0 += m_DimData[dimension].m_Stride1;
+ inData1 += m_DimData[dimension].m_Stride2;
+ outData += m_DimData[dimension].m_StrideOut;
+ }
+ }
+
+private:
+ // Struct to hold the dimension data
+ struct BroadcastDimensionData
+ {
+ unsigned int m_DimSize;
+ unsigned int m_StrideOut;
+ unsigned int m_Stride1;
+ unsigned int m_Stride2;
+ };
+
+ std::vector<BroadcastDimensionData> m_DimData;
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.cpp b/src/armnn/backends/RefWorkloads/ConvImpl.cpp
new file mode 100644
index 0000000000..9ebadacddb
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/ConvImpl.cpp
@@ -0,0 +1,71 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ConvImpl.hpp"
+
+#include <boost/assert.hpp>
+
+#include <cmath>
+#include <limits>
+
+namespace armnn
+{
+
+QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)
+{
+ BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);
+ if (multiplier == 0.0f)
+ {
+ m_Multiplier = 0;
+ m_RightShift = 0;
+ }
+ else
+ {
+ const double q = std::frexp(multiplier, &m_RightShift);
+ m_RightShift = -m_RightShift;
+ int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ BOOST_ASSERT(qFixed <= (1ll << 31));
+ if (qFixed == (1ll << 31))
+ {
+ qFixed /= 2;
+ --m_RightShift;
+ }
+ BOOST_ASSERT(m_RightShift >= 0);
+ BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
+ m_Multiplier = static_cast<int32_t>(qFixed);
+ }
+}
+
+int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const
+{
+ int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
+ return RoundingDivideByPOT(x, m_RightShift);
+}
+
+int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
+{
+ // Check for overflow
+ if (a == b && a == std::numeric_limits<int32_t>::min())
+ {
+ return std::numeric_limits<int32_t>::max();
+ }
+ int64_t a_64(a);
+ int64_t b_64(b);
+ int64_t ab_64 = a_64 * b_64;
+ int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+ int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
+ return ab_x2_high32;
+}
+
+int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
+{
+ BOOST_ASSERT(exponent >= 0 && exponent <= 31);
+ int32_t mask = (1 << exponent) - 1;
+ int32_t remainder = x & mask;
+ int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
+ return (x >> exponent) + (remainder > threshold ? 1 : 0);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.hpp b/src/armnn/backends/RefWorkloads/ConvImpl.hpp
new file mode 100644
index 0000000000..ecc5b14687
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/ConvImpl.hpp
@@ -0,0 +1,184 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/assert.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <cmath>
+#include <limits>
+
+namespace armnn
+{
+
+/// Performs multiplication of a integer with a multiplier which is less than one,
+/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.
+struct QuantizedMultiplierSmallerThanOne
+{
+public:
+ /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
+ /// This stores the appropriate integer quantities (derived from the given multiplier) for later use.
+ /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().
+ QuantizedMultiplierSmallerThanOne(float multiplier);
+
+ /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne()
+ int32_t operator*(int32_t rhs) const;
+
+private:
+ /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul()
+ static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);
+
+ /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT()
+ static int32_t RoundingDivideByPOT(int32_t x, int exponent);
+
+ int32_t m_Multiplier;
+ int32_t m_RightShift;
+};
+
+/// an implementation shared by normal and depthwise convolution
+template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
+static void ConvImpl(ConvData data,
+ const InputType* inputData,
+ float inputScale,
+ int32_t inputOffset,
+ const InputType* filterData,
+ float filterScale,
+ int32_t filterOffset,
+ const BiasType* biasData,
+ InputType* outputData,
+ float outputScale,
+ int32_t outputOffset,
+ bool depthwise = false)
+{
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+ const TensorInfo& filterInfo = data.m_Weight->GetTensorInfo();
+
+ unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;
+ unsigned int channelsInput = filterInfo.GetShape()[1];
+ unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];
+
+ BOOST_ASSERT(data.m_Parameters.m_BiasEnabled == false || biasData != nullptr);
+
+ unsigned int batchSize = outputInfo0.GetShape()[0];
+ unsigned int heightOutput = outputInfo0.GetShape()[2];
+ unsigned int widthOutput = outputInfo0.GetShape()[3];
+ unsigned int heightInput = inputInfo0.GetShape()[2];
+ unsigned int widthInput = inputInfo0.GetShape()[3];
+
+ unsigned int heightFilter = filterInfo.GetShape()[2];
+ unsigned int widthFilter = filterInfo.GetShape()[3];
+
+ unsigned int paddingTop = data.m_Parameters.m_PadTop;
+ unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
+ unsigned int hStride = data.m_Parameters.m_StrideY;
+ unsigned int xStride = data.m_Parameters.m_StrideX;
+
+ // the world's least efficient convolution
+ for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
+ {
+ for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)
+ {
+ for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)
+ {
+ for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)
+ {
+ // this loop goes over each output element
+ AccumulatorType sum = AccumulatorType();
+
+ // for depthwise, each output channel corresponds to exactly one input channel
+ // for normal, must loop over each input channel
+ for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)
+ {
+ unsigned int depthwiseMultiplierIdx = 0;
+ if (depthwise)
+ {
+ cInput = cOutput / depthMult;
+ depthwiseMultiplierIdx = cOutput % depthMult;
+ }
+
+ for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)
+ {
+ for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)
+ {
+ // this loop goes over each input element for each output element
+
+ unsigned int filterIndex;
+
+ // since dimensionality of kernel depends on depthwiseness, so does index
+ if (depthwise)
+ {
+ filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput +
+ cInput * widthFilter * heightFilter +
+ yFilter * widthFilter +
+ xFilter;
+ }
+ else
+ {
+ filterIndex = cOutput * widthFilter * heightFilter * channelsInput +
+ cInput * widthFilter * heightFilter +
+ yFilter * widthFilter +
+ xFilter;
+ }
+ AccumulatorType filterValue = filterData[filterIndex] -
+ boost::numeric_cast<AccumulatorType>(filterOffset);
+
+ unsigned int yInput = yOutput * hStride + yFilter;
+ unsigned int xInput = xOutput * xStride + xFilter;
+
+ AccumulatorType inputValue;
+
+ // check if we're in the padding
+ if (yInput < paddingTop || yInput >= heightInput + paddingTop ||
+ xInput < paddingLeft || xInput >= widthInput + paddingLeft )
+ {
+ inputValue = AccumulatorType();
+ }
+ else
+ {
+ inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput +
+ widthInput * heightInput * cInput +
+ widthInput * (yInput - paddingTop) +
+ xInput - paddingLeft] -
+ boost::numeric_cast<AccumulatorType>(inputOffset);
+ }
+ sum += filterValue * inputValue;
+ }
+ }
+ }
+
+ if (data.m_Parameters.m_BiasEnabled)
+ {
+ sum += biasData[cOutput];
+ }
+
+ if (outputScale != 0.0f)
+ {
+ float multiplier = (inputScale * filterScale) / outputScale;
+ // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent
+ // with the AndroidNN CPU implementation. This should be (roughly) equivalent to:
+ // sum = std::round(multiplier * sum + outputOffset);
+ sum = boost::numeric_cast<AccumulatorType>(
+ QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))
+ + boost::numeric_cast<AccumulatorType>(outputOffset);
+ sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);
+ }
+
+ outputData[batchIdx * widthOutput * heightOutput * channelsOutput +
+ widthOutput * heightOutput * cOutput +
+ widthOutput * yOutput +
+ xOutput] = boost::numeric_cast<InputType>(sum);
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.cpp b/src/armnn/backends/RefWorkloads/FullyConnected.cpp
new file mode 100644
index 0000000000..8ba11d19c6
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/FullyConnected.cpp
@@ -0,0 +1,62 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "FullyConnected.hpp"
+
+#include <boost/assert.hpp>
+
+namespace armnn
+{
+
+void FullyConnected(const float* inputData,
+ float* outputData,
+ const TensorInfo& inputTensorInfo,
+ const TensorInfo& outputTensorInfo,
+ const float* weightData,
+ const float* biasData,
+ bool transposeWeights)
+{
+ unsigned int N = outputTensorInfo.GetShape()[1]; // Output Vector Size
+
+ BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Need some data
+
+ unsigned int K = 1; // Total number of activations in the input
+ for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++)
+ {
+ K *= inputTensorInfo.GetShape()[i];
+ }
+
+ for (unsigned int n = 0; n < inputTensorInfo.GetShape()[0]; n++)
+ {
+ for (unsigned int channelOutput = 0; channelOutput < N; channelOutput++)
+ {
+ float outval = 0.f;
+
+ for (unsigned int channelInput = 0; channelInput < K; channelInput++)
+ {
+ float weight;
+ if (transposeWeights)
+ {
+ weight = weightData[channelOutput * K + channelInput];
+ }
+ else
+ {
+ weight = weightData[channelInput * N + channelOutput];
+ }
+
+ outval += weight * inputData[n * K + channelInput];
+ }
+
+ if (biasData)
+ {
+ outval += biasData[channelOutput];
+ }
+
+ outputData[n * N + channelOutput] = outval;
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.hpp b/src/armnn/backends/RefWorkloads/FullyConnected.hpp
new file mode 100644
index 0000000000..9fa2456110
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/FullyConnected.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+/// Performs a matrix multiplication and optionally adds a bias
+void FullyConnected(const float* inputData,
+ float* outputData,
+ const TensorInfo& inputTensorInfo,
+ const TensorInfo& outputTensorInfo,
+ const float* weightData,
+ const float* biasData,
+ bool transposeWeights);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp
new file mode 100644
index 0000000000..9695e457e2
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Merger.hpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+template <typename DataType>
+void Merger(const MergerQueueDescriptor& data)
+{
+ const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+
+ for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
+ {
+ unsigned int indices[MaxNumOfTensorDimensions];
+
+ unsigned int indexRemainder = index;
+ unsigned int dimensionStride = outputInfo0.GetNumElements();
+
+ for (unsigned int i=0; i<outputInfo0.GetNumDimensions(); i++)
+ {
+ dimensionStride /= outputInfo0.GetShape()[i];
+ indices[i] = indexRemainder / dimensionStride; // use integer division to round down
+ indexRemainder -= indices[i] * dimensionStride;
+ }
+
+ for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
+ {
+ MergerQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
+
+ //split view extents are defined by the size of (the corresponding) input tensor
+ const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]);
+
+ // check all dimensions to see if this element is inside the given input view
+ bool insideView = true;
+ for (unsigned int i=0; i<inputInfo.GetNumDimensions(); i++)
+ {
+ if (indices[i] < view.m_Origin[i])
+ {
+ insideView = false;
+ }
+ if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i])
+ {
+ insideView = false;
+ }
+ }
+
+ if (insideView)
+ {
+ unsigned int inIndex = 0;
+ unsigned int dimensionStride = 1;
+
+ for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;)
+ {
+ inIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
+ dimensionStride *= inputInfo.GetShape()[i];
+ }
+
+ //we are within the view, copy input data to the output corresponding to this view
+ (GetOutputTensorData<DataType>(0, data))[index] =
+ (GetInputTensorData<DataType>(viewIdx, data))[inIndex];
+
+ //what should we do if input views overlap on the output tensor?
+ //we could error, take the average, or shm else...
+ //for now just stop after finding first view (input) that matches.
+ break;
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Multiplication.cpp b/src/armnn/backends/RefWorkloads/Multiplication.cpp
new file mode 100644
index 0000000000..7f558d83c5
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Multiplication.cpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "Multiplication.hpp"
+
+namespace armnn
+{
+
+void Multiplication(const float* in0,
+ const float* in1,
+ unsigned int numElements,
+ float* out)
+{
+ for (unsigned int i = 0; i < numElements; ++i)
+ {
+ out[i] = in0[i] * in1[i];
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Multiplication.hpp b/src/armnn/backends/RefWorkloads/Multiplication.hpp
new file mode 100644
index 0000000000..d0b033e7ec
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Multiplication.hpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+namespace armnn
+{
+
+void Multiplication(const float* in0,
+ const float* in1,
+ unsigned int numElements,
+ float* out);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
new file mode 100644
index 0000000000..6d15d8a436
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
@@ -0,0 +1,241 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "Pooling2d.hpp"
+
+#include <armnn/Exceptions.hpp>
+#include <armnn/Types.hpp>
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <limits>
+#include <algorithm>
+#include <functional>
+
+namespace
+{
+ using PoolingAlgorithm = armnn::PoolingAlgorithm;
+
+ float DefaultInitializer(PoolingAlgorithm algorithm)
+ {
+ switch (algorithm)
+ {
+ case PoolingAlgorithm::Max:
+ {
+ return std::numeric_limits<float>::lowest();
+ }
+ case PoolingAlgorithm::Average:
+ case PoolingAlgorithm::L2:
+ {
+ return 0.0f;
+ }
+ default:
+ {
+ throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+ }
+ }
+ }
+
+ using Accumulator = std::function<void(float & accu, float value)>;
+
+ Accumulator GetAccumulator(PoolingAlgorithm algorithm)
+ {
+ switch (algorithm)
+ {
+ case PoolingAlgorithm::Max:
+ {
+ return [](float & accu, float value) {
+ if (value > accu) {
+ accu = value;
+ }
+ };
+ }
+
+ case PoolingAlgorithm::Average:
+ {
+ return [](float & accu, float value) {
+ accu += value;
+ };
+ }
+
+ case PoolingAlgorithm::L2:
+ {
+ return [](float & accu, float value) {
+ accu += (value*value);
+ };
+ }
+
+ default:
+ {
+ throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+ }
+ }
+ }
+
+ using Executor = std::function<void(float & accumulated, float kernelSize)>;
+
+ Executor GetExecutor(PoolingAlgorithm algorithm)
+ {
+ switch (algorithm)
+ {
+ case PoolingAlgorithm::Max:
+ {
+ return [](float & accumulated, float kernelSize) {};
+ }
+
+ case PoolingAlgorithm::Average:
+ {
+ return [](float & accumulated, float kernelSize) {
+ accumulated /= kernelSize;
+ };
+ }
+
+ case PoolingAlgorithm::L2:
+ {
+ return [](float & accumulated, float kernelSize) {
+ accumulated = sqrtf(accumulated / kernelSize);
+ };
+ }
+
+ default:
+ {
+ throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+ }
+ }
+ }
+
+ bool OnPaddingOnly(int start, int end, int maxRange, int padding)
+ {
+ if (end <= 0 || start > (maxRange - padding))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+
+ bool ClampRange(int & start, int & end, int maxRange)
+ {
+ if (start < 0 || end > maxRange)
+ {
+ start = std::min(std::max(start, 0), maxRange);
+ end = std::min(std::max(end, 0), maxRange);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+}
+
+namespace armnn
+{
+
+void Pooling2d(const float* in,
+ float* out,
+ const TensorInfo& inputInfo,
+ const TensorInfo& outputInfo,
+ const Pooling2dDescriptor& params)
+{
+ const int batchSize = boost::numeric_cast<int>(outputInfo.GetShape()[0]);
+ const int channels = boost::numeric_cast<int>(outputInfo.GetShape()[1]);
+ const int heightOutput = boost::numeric_cast<int>(outputInfo.GetShape()[2]);
+ const int widthOutput = boost::numeric_cast<int>(outputInfo.GetShape()[3]);
+ const int heightInput = boost::numeric_cast<int>(inputInfo.GetShape()[2]);
+ const int widthInput = boost::numeric_cast<int>(inputInfo.GetShape()[3]);
+ const int padLeft = boost::numeric_cast<int>(params.m_PadLeft);
+ const int padRight = boost::numeric_cast<int>(params.m_PadRight);
+ const int padTop = boost::numeric_cast<int>(params.m_PadTop);
+ const int padBottom = boost::numeric_cast<int>(params.m_PadBottom);
+ const int strideX = boost::numeric_cast<int>(params.m_StrideX);
+ const int strideY = boost::numeric_cast<int>(params.m_StrideY);
+ const int poolHeight = boost::numeric_cast<int>(params.m_PoolHeight);
+ const int poolWidth = boost::numeric_cast<int>(params.m_PoolWidth);
+
+ float defaultInitializer = DefaultInitializer(params.m_PoolType);
+
+ Accumulator accumulate = GetAccumulator(params.m_PoolType);
+ Executor execute = GetExecutor(params.m_PoolType);
+
+ // Check supported padding methods outside the loop to simplify
+ // the inner loop
+ if (params.m_PaddingMethod != PaddingMethod::Exclude &&
+ params.m_PaddingMethod != PaddingMethod::IgnoreValue)
+ {
+ throw armnn::InvalidArgumentException("Unsupported padding type");
+ }
+
+ for (int n = 0; n < batchSize; n++)
+ {
+ for (int c = 0; c < channels; c++)
+ {
+ for (int yOutput = 0; yOutput < heightOutput; yOutput++)
+ {
+ for (int xOutput = 0; xOutput < widthOutput; xOutput++)
+ {
+ int hstart = (yOutput * strideY) - padTop;
+ int wstart = (xOutput * strideX) - padLeft;
+ int hend = hstart + poolHeight;
+ int wend = wstart + poolWidth;
+
+ // Clamp the pooling region inside the valid input area (which includes the padding).
+ // This is necessary because the final pooling in a row may overlap beyond the padding.
+ hend = std::min(hend, heightInput + padRight);
+ wend = std::min(wend, widthInput + padBottom);
+
+ float result = defaultInitializer;
+ float poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
+
+ // special case: when the pooling kernel is over a padding region and the padding
+ // size is larger or equal to the kernel and the kernel only covers
+ // padding and no real values, then we initialize the result as zero
+ // by convention. This is because we need to choose a value here and
+ // all values we have are padding, which we ignore.
+ if (OnPaddingOnly(hstart, hend, heightInput, padBottom) ||
+ OnPaddingOnly(wstart, wend, widthInput, padRight))
+ {
+ result = 0.0f;
+ }
+
+ bool clamped = ClampRange(wstart, wend, widthInput);
+ clamped |= ClampRange(hstart, hend, heightInput);
+
+ if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
+ {
+ // when we exclude the padding, it means we calculate with a smaller
+ // kernel size, so I change the divisor here
+ poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
+ }
+
+ for (auto yInput = hstart; yInput < hend; yInput++)
+ {
+ for (auto xInput = wstart; xInput < wend; xInput++)
+ {
+ float inval = in[n * widthInput * heightInput * channels +
+ c * widthInput * heightInput +
+ yInput * widthInput +
+ xInput];
+
+ accumulate(result, inval);
+ }
+ }
+
+ execute(result, poolAreaSize);
+
+ out[n * widthOutput * heightOutput * channels +
+ c * widthOutput * heightOutput +
+ yOutput * widthOutput +
+ xOutput] = result;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.hpp b/src/armnn/backends/RefWorkloads/Pooling2d.hpp
new file mode 100644
index 0000000000..f88b1a0a4e
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Pooling2d.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+/// Computes the Pooling2d operation
+void Pooling2d(const float* in,
+ float* out,
+ const TensorInfo& inputInfo,
+ const TensorInfo& outputInfo,
+ const Pooling2dDescriptor& params);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp
new file mode 100644
index 0000000000..f566759deb
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefActivationFloat32Workload.hpp"
+
+#include "Activation.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefActivationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationFloat32Workload_Execute");
+
+ Activation(GetInputTensorDataFloat(0, m_Data),
+ GetOutputTensorDataFloat(0, m_Data),
+ GetTensorInfo(m_Data.m_Inputs[0]),
+ m_Data.m_Parameters.m_Function,
+ m_Data.m_Parameters.m_A,
+ m_Data.m_Parameters.m_B);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp
new file mode 100644
index 0000000000..d8bd216699
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+
+namespace armnn
+{
+
+class RefActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor>
+{
+public:
+ using Float32Workload<ActivationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp
new file mode 100644
index 0000000000..e8852f4bf8
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefActivationUint8Workload.hpp"
+
+#include "Activation.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefActivationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationUint8Workload_Execute");
+
+ const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo);
+
+ std::vector<float> results(tensorInfo.GetNumElements());
+
+ Activation(dequant.data(),
+ results.data(),
+ tensorInfo,
+ m_Data.m_Parameters.m_Function,
+ m_Data.m_Parameters.m_A,
+ m_Data.m_Parameters.m_B);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0]));
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp
new file mode 100644
index 0000000000..51514d0646
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor>
+{
+public:
+ using Uint8Workload<ActivationQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.cpp
new file mode 100644
index 0000000000..e06d7f9295
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefAdditionFloat32Workload.hpp"
+
+#include "Addition.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefAdditionFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefAdditionFloat32Workload_Execute");
+
+ const TensorShape& inShape0 = GetTensorInfo(m_Data.m_Inputs[0]).GetShape();
+ const TensorShape& inShape1 = GetTensorInfo(m_Data.m_Inputs[1]).GetShape();
+ const TensorShape& outShape = GetTensorInfo(m_Data.m_Outputs[0]).GetShape();
+
+ const float* inData0 = GetInputTensorDataFloat(0, m_Data);
+ const float* inData1 = GetInputTensorDataFloat(1, m_Data);
+ float* outData = GetOutputTensorDataFloat(0, m_Data);
+
+ Addition(inShape0, inShape1, outShape, inData0, inData1, outData);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.hpp
new file mode 100644
index 0000000000..e69ea28b28
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor>
+{
+public:
+ using Float32Workload<AdditionQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.cpp
new file mode 100644
index 0000000000..fa2dfeefc0
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.cpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefAdditionUint8Workload.hpp"
+
+#include "Addition.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefAdditionUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefAdditionUint8Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto dequant0 = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0);
+ auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+
+ Addition(inputInfo0.GetShape(),
+ inputInfo1.GetShape(),
+ outputInfo.GetShape(),
+ dequant0.data(),
+ dequant1.data(),
+ results.data());
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.hpp
new file mode 100644
index 0000000000..0f5a23ef4d
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefAdditionUint8Workload : public Uint8Workload<AdditionQueueDescriptor>
+{
+public:
+ using Uint8Workload<AdditionQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp
new file mode 100644
index 0000000000..50a198f011
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefBaseConstantWorkload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Types.hpp>
+
+#include <boost/assert.hpp>
+
+#include <cstring>
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+void RefBaseConstantWorkload<DataType>::Execute() const
+{
+ // Considering the reference backend independently, it could be possible to initialise the intermediate tensor
+ // created by the layer output handler at workload construction time, rather than at workload execution time.
+ // However, this is not an option for other backends (e.g. CL). For consistency, we prefer to align all
+ // implementations.
+ // A similar argument can be made about performing the memory copy in the first place (the layer output handler
+ // could have a non-owning reference to the layer output tensor managed by the const input layer); again, this is
+ // not an option for other backends, and the extra complexity required to make this work for the reference backend
+ // may not be worth the effort (skipping a memory copy in the first inference).
+ if (!m_RanOnce)
+ {
+ const ConstantQueueDescriptor& data = this->m_Data;
+
+ BOOST_ASSERT(data.m_LayerOutput != nullptr);
+
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
+ BOOST_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes());
+
+ memcpy(GetOutputTensorData<void>(0, data), data.m_LayerOutput->GetConstTensor<void>(),
+ outputInfo.GetNumBytes());
+
+ m_RanOnce = true;
+ }
+}
+
+template class RefBaseConstantWorkload<DataType::Float32>;
+template class RefBaseConstantWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp
new file mode 100644
index 0000000000..0ede46d9fb
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Constant layer common to all data types
+template <armnn::DataType DataType>
+class RefBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType>
+{
+public:
+ RefBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info)
+ , m_RanOnce(false)
+ {
+ }
+
+ virtual void Execute() const override;
+
+private:
+ mutable bool m_RanOnce;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..c421b0f212
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefBatchNormalizationFloat32Workload.hpp"
+
+#include "BatchNormImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefBatchNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute");
+
+ const float* var = m_Data.m_Variance->GetConstTensor<float>();
+ const float* mean = m_Data.m_Mean->GetConstTensor<float>();
+ const float* gamma = m_Data.m_Gamma->GetConstTensor<float>();
+ const float* beta = m_Data.m_Beta->GetConstTensor<float>();
+
+ auto inputData = GetInputTensorDataFloat(0, m_Data);
+ auto outputData = GetOutputTensorDataFloat(0, m_Data);
+
+ BatchNormImpl(m_Data, var, mean, gamma, beta, outputData, inputData);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..cbcdadd749
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
+{
+public:
+ using Float32Workload<BatchNormalizationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp
new file mode 100644
index 0000000000..8a48523765
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefBatchNormalizationUint8Workload.hpp"
+
+#include "BatchNormImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefBatchNormalizationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& varInfo = GetTensorInfo(m_Data.m_Variance);
+ const TensorInfo& meanInfo = GetTensorInfo(m_Data.m_Mean);
+ const TensorInfo& gammaInfo = GetTensorInfo(m_Data.m_Gamma);
+ const TensorInfo& betaInfo = GetTensorInfo(m_Data.m_Beta);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0);
+ auto var = Dequantize(m_Data.m_Variance->GetConstTensor<uint8_t>(), varInfo);
+ auto mean = Dequantize(m_Data.m_Mean->GetConstTensor<uint8_t>(), meanInfo);
+ auto gamma = Dequantize(m_Data.m_Gamma->GetConstTensor<uint8_t>(), gammaInfo);
+ auto beta = Dequantize(m_Data.m_Beta->GetConstTensor<uint8_t>(), betaInfo);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data());
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp
new file mode 100644
index 0000000000..57fe995ba5
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefBatchNormalizationUint8Workload : public Uint8Workload<BatchNormalizationQueueDescriptor>
+{
+public:
+ using Uint8Workload<BatchNormalizationQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp
new file mode 100644
index 0000000000..0ed66013f6
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefConstantFloat32Workload.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConstantFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantFloat32Workload_Execute");
+ RefBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp
new file mode 100644
index 0000000000..f0876a99bf
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "RefBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class RefConstantFloat32Workload : public RefBaseConstantWorkload<DataType::Float32>
+{
+public:
+ using RefBaseConstantWorkload<DataType::Float32>::RefBaseConstantWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp
new file mode 100644
index 0000000000..2a4a514ad8
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefConstantUint8Workload.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConstantUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantUint8Workload_Execute");
+ RefBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp
new file mode 100644
index 0000000000..504737dade
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "RefBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class RefConstantUint8Workload : public RefBaseConstantWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using RefBaseConstantWorkload<DataType::QuantisedAsymm8>::RefBaseConstantWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp
new file mode 100644
index 0000000000..6e4cc69063
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefConvolution2dFloat32Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConvolution2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute");
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ const float* weightData = m_Data.m_Weight->template GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Data.m_Bias->template GetConstTensor<float>() : nullptr;
+
+ ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>(
+ m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp
new file mode 100644
index 0000000000..514369c262
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
+{
+public:
+ using Float32Workload<Convolution2dQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000000..f390baa387
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefConvolution2dUint8Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConvolution2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dUint8Workload_Execute");
+
+ const uint8_t* inputData = GetInputTensorDataU8(0, m_Data);
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor<uint8_t>();
+ const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight);
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Data.m_Bias->template GetConstTensor<int32_t>() :
+ nullptr;
+ uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ ConvImpl<armnn::Convolution2dQueueDescriptor, uint8_t, int32_t, int32_t>(
+ m_Data,
+ inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
+ weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
+ biasData,
+ outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset());
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000000..954a206463
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
+{
+public:
+ using Uint8Workload<Convolution2dQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp
new file mode 100644
index 0000000000..c631fecb66
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefDepthwiseConvolution2dFloat32Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefDepthwiseConvolution2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute");
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ const float* weightData = m_Data.m_Weight->template GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Data.m_Bias->template GetConstTensor<float>() : nullptr;
+
+ ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float>
+ (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, true);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp
new file mode 100644
index 0000000000..34e6524684
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ using Float32Workload<DepthwiseConvolution2dQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000000..5a8fb13112
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefDepthwiseConvolution2dUint8Workload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefDepthwiseConvolution2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute");
+
+ const uint8_t* inputData = GetInputTensorDataU8(0, m_Data);
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor<uint8_t>();
+ const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight);
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
+ m_Data.m_Bias->template GetConstTensor<int32_t>() :
+ nullptr;
+ uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, uint8_t, int32_t, int32_t>(
+ m_Data,
+ inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
+ weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
+ biasData,
+ outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), true);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000000..bd9945f529
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ using Uint8Workload<DepthwiseConvolution2dQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp
new file mode 100644
index 0000000000..483fa7e00e
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefFakeQuantizationFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <boost/numeric/conversion/cast.hpp>
+
+namespace armnn
+{
+
+void FakeQuantization(const float* inputData, float* outputData, uint32_t numElements, float min, float max)
+{
+ float scale = (max - min) / 255.f;
+ int32_t offset = boost::numeric_cast<int32_t>((-min * 255.f) / (max - min));
+
+ for (uint32_t i = 0; i < numElements; i++)
+ {
+ outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset));
+ }
+
+}
+
+void RefFakeQuantizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ FakeQuantization(inputData, outputData, inputInfo.GetNumElements(),
+ m_Data.m_Parameters.m_Min,
+ m_Data.m_Parameters.m_Max);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp
new file mode 100644
index 0000000000..7ad5272edb
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFakeQuantizationFloat32Workload : public Float32Workload<FakeQuantizationQueueDescriptor>
+{
+public:
+ using Float32Workload<FakeQuantizationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp
new file mode 100644
index 0000000000..4bc7ec4404
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefFloorFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefFloorFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute");
+
+ const float* const input = GetInputTensorDataFloat(0, m_Data);
+ float* const output = GetOutputTensorDataFloat(0, m_Data);
+
+ unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ for (unsigned int i = 0; i < numElements; ++i)
+ {
+ output[i] = floorf(input[i]);
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp
new file mode 100644
index 0000000000..1eb5e2ff7b
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor>
+{
+public:
+ using Float32Workload<FloorQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp
new file mode 100644
index 0000000000..6fe203e5f0
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefFullyConnectedFloat32Workload.hpp"
+
+#include "FullyConnected.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefFullyConnectedFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+ const float* weightData = m_Data.m_Weight->GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Data.m_Bias->GetConstTensor<float>() : nullptr;
+
+ FullyConnected(inputData,
+ outputData,
+ inputInfo,
+ outputInfo,
+ weightData,
+ biasData,
+ m_Data.m_Parameters.m_TransposeWeightMatrix);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp
new file mode 100644
index 0000000000..cb835bd2ce
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor>
+{
+public:
+ using Float32Workload<FullyConnectedQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp
new file mode 100644
index 0000000000..0186d3f5e5
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefFullyConnectedUint8Workload.hpp"
+
+#include "FullyConnected.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefFullyConnectedUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedUint8Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ const uint8_t* weightData = m_Data.m_Weight->GetConstTensor<uint8_t>();
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+
+ auto weight = Dequantize(weightData, m_Data.m_Weight->GetTensorInfo());
+
+ std::vector<float> results(inputInfo.GetNumElements());
+
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ const int32_t* biasData = m_Data.m_Bias->GetConstTensor<int32_t>();
+ auto bias = Dequantize(biasData, m_Data.m_Bias->GetTensorInfo());
+
+ FullyConnected(dequant.data(),
+ results.data(),
+ inputInfo,
+ outputInfo,
+ weight.data(),
+ bias.data(),
+ m_Data.m_Parameters.m_TransposeWeightMatrix);
+ }
+ else
+ {
+ FullyConnected(dequant.data(),
+ results.data(),
+ inputInfo,
+ outputInfo,
+ weight.data(),
+ nullptr,
+ m_Data.m_Parameters.m_TransposeWeightMatrix);
+ }
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp
new file mode 100644
index 0000000000..cd14ea85e0
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefFullyConnectedUint8Workload : public Uint8Workload<FullyConnectedQueueDescriptor>
+{
+public:
+ using Uint8Workload<FullyConnectedQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..82c1ecd32e
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp
@@ -0,0 +1,61 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefL2NormalizationFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "TensorBufferArrayView.hpp"
+
+#include "Profiling.hpp"
+
+#include <cmath>
+
+namespace armnn
+{
+
+void RefL2NormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ TensorBufferArrayView<const float> input(inputInfo.GetShape(), GetInputTensorDataFloat(0, m_Data));
+ TensorBufferArrayView<float> output(outputInfo.GetShape(), GetOutputTensorDataFloat(0, m_Data));
+
+ const unsigned int batchSize = inputInfo.GetShape()[0];
+ const unsigned int depth = inputInfo.GetShape()[1];
+ const unsigned int rows = inputInfo.GetShape()[2];
+ const unsigned int cols = inputInfo.GetShape()[3];
+
+ for (unsigned int n = 0; n < batchSize; ++n)
+ {
+ for (unsigned int d = 0; d < depth; ++d)
+ {
+ for (unsigned int h = 0; h < rows; ++h)
+ {
+ for (unsigned int w = 0; w < cols; ++w)
+ {
+ float reduction = 0.0;
+ for (unsigned int c = 0; c < depth; ++c)
+ {
+ const float value = input.Get(n, c, h, w);
+ reduction += value * value;
+ }
+
+ // Using std::max(reduction, epsilon) below would prevent against division by 0.
+ // However, at the time of writing:
+ // - This is not supported by the ACL functions used to implement L2Normalization in the CL
+ // backend.
+ // - The reference semantics for this operator do not include this parameter.
+ const float scale = 1.0f / sqrtf(reduction);
+ output.Get(n, d, h, w) = input.Get(n, d, h, w) * scale;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..a2420279f5
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
+{
+public:
+ using Float32Workload<L2NormalizationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp
new file mode 100644
index 0000000000..41d3c05d4b
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefMergerFloat32Workload.hpp"
+
+#include "Merger.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefMergerFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerFloat32Workload_Execute");
+ Merger<float>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp
new file mode 100644
index 0000000000..d894c2a2ca
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefMergerFloat32Workload : public Float32Workload<MergerQueueDescriptor>
+{
+public:
+ using Float32Workload<MergerQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp
new file mode 100644
index 0000000000..3f4371b628
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefMergerUint8Workload.hpp"
+
+#include "Merger.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefMergerUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerUint8Workload_Execute");
+ Merger<uint8_t>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp
new file mode 100644
index 0000000000..4c9bbcac50
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefMergerUint8Workload : public Uint8Workload<MergerQueueDescriptor>
+{
+public:
+ using Uint8Workload<MergerQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
new file mode 100644
index 0000000000..ed68b1f6db
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefMultiplicationFloat32Workload.hpp"
+
+#include "Multiplication.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefMultiplicationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMultiplicationFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData0 = GetInputTensorDataFloat(0, m_Data);
+ const float* inputData1 = GetInputTensorDataFloat(1, m_Data);
+ Multiplication(inputData0, inputData1, inputInfo0.GetNumElements(), outputData);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp
new file mode 100644
index 0000000000..920d072836
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor>
+{
+public:
+ using Float32Workload<MultiplicationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
new file mode 100644
index 0000000000..2e6f0e6c8b
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefMultiplicationUint8Workload.hpp"
+
+#include "Multiplication.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefMultiplicationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMultiplicationUint8Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto dequant0 = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0);
+ auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ Multiplication(dequant0.data(),
+ dequant1.data(),
+ inputInfo0.GetNumElements(),
+ results.data());
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.hpp
new file mode 100644
index 0000000000..5da2e581eb
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefMultiplicationUint8Workload : public Uint8Workload<MultiplicationQueueDescriptor>
+{
+public:
+ using Uint8Workload<MultiplicationQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..c743207423
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp
@@ -0,0 +1,185 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefNormalizationFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/log/trivial.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+
+namespace armnn
+{
+
+// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization
+static void NormalizeWithinUingLbr(const float* inputData,
+ float* outputData,
+ const TensorShape& tensorShape,
+ uint32_t norm_size,
+ float alpha,
+ float beta,
+ float kappa)
+{
+ const unsigned int batchSize = tensorShape[0];
+ const unsigned int depth = tensorShape[1];
+ const unsigned int rows = tensorShape[2];
+ const unsigned int cols = tensorShape[3];
+
+ int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */
+
+ for (unsigned int n = 0; n < batchSize; n++)
+ {
+ for (unsigned int c = 0; c < depth; c++)
+ {
+ for (unsigned int h = 0; h < rows; h++)
+ {
+ for (unsigned int w = 0; w < cols; w++)
+ {
+ float accumulated_scale = 0.0;
+ for (int y = -radius; y <= radius; y++)
+ {
+ for (int x = -radius; x <= radius; x++)
+ {
+ int i = boost::numeric_cast<int>(w) + x;
+ int j = boost::numeric_cast<int>(h) + y;
+
+ if ((i < 0) || (i >= boost::numeric_cast<int>(cols)))
+ {
+ continue;
+ }
+
+ if ((j < 0) || (j >= boost::numeric_cast<int>(rows)))
+ {
+ continue;
+ }
+
+ float inval = inputData[n * cols * rows * depth +
+ c * cols * rows +
+ boost::numeric_cast<unsigned int>(j) * cols +
+ boost::numeric_cast<unsigned int>(i)];
+
+ accumulated_scale += inval*inval;
+ }
+ }
+ outputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w] = inputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w] / (powf((kappa + (accumulated_scale * alpha)), beta));
+ }
+ }
+ }
+ }
+}
+
+// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization
+void NormalizeAcrossUingLbr(const float* inputData,
+ float* outputData,
+ const TensorShape& tensorShape,
+ uint32_t norm_size,
+ float alpha,
+ float beta,
+ float kappa)
+{
+ const unsigned int batchSize = tensorShape[0];
+ const unsigned int depth = tensorShape[1];
+ const unsigned int rows = tensorShape[2];
+ const unsigned int cols = tensorShape[3];
+
+ int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */
+
+ for (unsigned int n = 0; n < batchSize; n++)
+ {
+ for (unsigned int c = 0; c < depth; c++)
+ {
+ for (unsigned int h = 0; h < rows; h++)
+ {
+ for (unsigned int w = 0; w < cols; w++)
+ {
+ float accumulated_scale = 0.0;
+ for (int z = -radius; z <= radius; z++)
+ {
+ int k = boost::numeric_cast<int>(c) + z;
+
+ if ((k < 0) || (k >= boost::numeric_cast<int>(depth)))
+ {
+ continue;
+ }
+
+ float inval = inputData[n * cols * rows * depth +
+ boost::numeric_cast<unsigned int>(k) * cols * rows +
+ h * cols +
+ w];
+
+ accumulated_scale += inval*inval;
+ }
+ float scale = kappa + (accumulated_scale * alpha);
+ scale = powf(scale, -beta);
+ outputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w] = scale *
+ inputData[n * cols * rows * depth +
+ c * cols * rows +
+ h * cols +
+ w];
+ }
+ }
+ }
+ }
+}
+
+void RefNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+
+
+ if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType)
+ {
+ if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType)
+ {
+ NormalizeWithinUingLbr(inputData,
+ outputData,
+ inputInfo.GetShape(),
+ m_Data.m_Parameters.m_NormSize,
+ m_Data.m_Parameters.m_Alpha,
+ m_Data.m_Parameters.m_Beta,
+ m_Data.m_Parameters.m_K);
+ }
+ else if (NormalizationAlgorithmChannel::Across == m_Data.m_Parameters.m_NormChannelType)
+ {
+ NormalizeAcrossUingLbr(inputData,
+ outputData,
+ inputInfo.GetShape(),
+ m_Data.m_Parameters.m_NormSize,
+ m_Data.m_Parameters.m_Alpha,
+ m_Data.m_Parameters.m_Beta,
+ m_Data.m_Parameters.m_K);
+ }
+ else
+ {
+ BOOST_LOG_TRIVIAL(warning) << "Illegal NORMALIZATION mode in normalization_f32";
+ return;
+ }
+ }
+ else
+ {
+ BOOST_LOG_TRIVIAL(warning) << "Lcr method (Jarret 2009: Local Contrast Normalization) not supported yet.";
+ return;
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..6f4175ae35
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
+{
+public:
+ using Float32Workload<NormalizationQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp
new file mode 100644
index 0000000000..b2bb8fbf3d
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefPermuteWorkload.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include <Permute.hpp>
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+void RefPermuteWorkload<DataType>::Execute() const
+{
+ using T = ResolveType<DataType>;
+
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
+
+ const ITensorHandle* src = m_Data.m_Inputs[0];
+ const ITensorHandle* dst = m_Data.m_Outputs[0];
+ const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
+
+ armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData<T>(src), GetCpuData<T>(dst));
+}
+
+template class RefPermuteWorkload<DataType::Float32>;
+template class RefPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp
new file mode 100644
index 0000000000..4ca1f38588
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+
+#include <armnn/TypesUtils.hpp>
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+class RefPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType>
+{
+public:
+ static const std::string& GetName()
+ {
+ static const std::string name = std::string("RefPermute") + GetDataTypeName(DataType) + "Workload";
+ return name;
+ }
+
+ using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
+ using TypedWorkload<PermuteQueueDescriptor, DataType>::TypedWorkload;
+ void Execute() const override;
+};
+
+using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>;
+using RefPermuteUint8Workload = RefPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp
new file mode 100644
index 0000000000..030f96c892
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefPooling2dFloat32Workload.hpp"
+
+#include "Pooling2d.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefPooling2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo0 = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = GetInputTensorDataFloat(0, m_Data);
+
+ Pooling2d(inputData,
+ outputData,
+ inputInfo0,
+ outputInfo0,
+ m_Data.m_Parameters);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp
new file mode 100644
index 0000000000..598b365a17
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefPooling2dFloat32Workload : public Float32Workload<Pooling2dQueueDescriptor>
+{
+public:
+ using Float32Workload<Pooling2dQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp
new file mode 100644
index 0000000000..7066fc910b
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefPooling2dUint8Workload.hpp"
+
+#include "Pooling2d.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefPooling2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dUint8Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ Pooling2d(dequant.data(),
+ results.data(),
+ inputInfo,
+ outputInfo,
+ m_Data.m_Parameters);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp
new file mode 100644
index 0000000000..cbeca2c41d
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefPooling2dUint8Workload : public Uint8Workload<Pooling2dQueueDescriptor>
+{
+public:
+ using Uint8Workload<Pooling2dQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp
new file mode 100644
index 0000000000..3bf7b48622
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefReshapeFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <cstring>
+
+namespace armnn
+{
+
+void RefReshapeFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeFloat32Workload_Execute");
+
+ void* output = GetOutputTensorData<void>(0, m_Data);
+ const void* input = GetInputTensorData<void>(0, m_Data);
+ unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes();
+ memcpy(output, input, numBytes);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp
new file mode 100644
index 0000000000..36fdf7f812
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor>
+{
+public:
+ using Float32Workload<ReshapeQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp
new file mode 100644
index 0000000000..38742607cd
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefReshapeUint8Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <cstring>
+
+namespace armnn
+{
+
+void RefReshapeUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeUint8Workload_Execute");
+
+ void* output = GetOutputTensorData<void>(0, m_Data);
+ const void* input = GetInputTensorData<void>(0, m_Data);
+ unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes();
+ memcpy(output, input, numBytes);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp
new file mode 100644
index 0000000000..38da277bd2
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor>
+{
+public:
+ using Uint8Workload<ReshapeQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp
new file mode 100644
index 0000000000..8ad7a76298
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefResizeBilinearFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "ResizeBilinear.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefResizeBilinearFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearFloat32Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ ResizeBilinear(GetInputTensorDataFloat(0, m_Data),
+ inputInfo,
+ GetOutputTensorDataFloat(0, m_Data),
+ outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp
new file mode 100644
index 0000000000..86e8693b91
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor>
+{
+public:
+ using Float32Workload<ResizeBilinearQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp
new file mode 100644
index 0000000000..dfa561db6d
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefResizeBilinearUint8Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "ResizeBilinear.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefResizeBilinearUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearUint8Workload_Execute");
+
+ const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ ResizeBilinear(dequant.data(), inputInfo, results.data(), outputInfo);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp
new file mode 100644
index 0000000000..f72fafda4f
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefResizeBilinearUint8Workload : public Uint8Workload<ResizeBilinearQueueDescriptor>
+{
+public:
+ using Uint8Workload<ResizeBilinearQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp
new file mode 100644
index 0000000000..590e514d3d
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefSoftmaxFloat32Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "Softmax.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefSoftmaxFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxFloat32Workload_Execute");
+
+ Softmax(GetInputTensorDataFloat(0, m_Data),
+ GetOutputTensorDataFloat(0, m_Data),
+ GetTensorInfo(m_Data.m_Inputs[0]),
+ m_Data.m_Parameters.m_Beta);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp
new file mode 100644
index 0000000000..4d30f9fa3f
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
+{
+public:
+ using Float32Workload<SoftmaxQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp
new file mode 100644
index 0000000000..5ef4a6da92
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp
@@ -0,0 +1,36 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefSoftmaxUint8Workload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "Softmax.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefSoftmaxUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxUint8Workload_Execute");
+
+ const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+ auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo);
+
+ std::vector<float> results(tensorInfo.GetNumElements());
+
+ Softmax(dequant.data(),
+ results.data(),
+ tensorInfo,
+ m_Data.m_Parameters.m_Beta);
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0]));
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp
new file mode 100644
index 0000000000..fadc764e0a
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
+{
+public:
+ using Uint8Workload<SoftmaxQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp
new file mode 100644
index 0000000000..35ab4e22ef
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefSplitterFloat32Workload.hpp"
+
+#include "Splitter.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefSplitterFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterFloat32Workload_Execute");
+ Splitter<float>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp
new file mode 100644
index 0000000000..722dde129c
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSplitterFloat32Workload : public Float32Workload<SplitterQueueDescriptor>
+{
+public:
+ using Float32Workload<SplitterQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp
new file mode 100644
index 0000000000..522a4463dd
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefSplitterUint8Workload.hpp"
+
+#include "Splitter.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefSplitterUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterUint8Workload_Execute");
+ Splitter<uint8_t>(m_Data);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp
new file mode 100644
index 0000000000..e28554951b
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefSplitterUint8Workload : public Uint8Workload<SplitterQueueDescriptor>
+{
+public:
+ using Uint8Workload<SplitterQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp
new file mode 100644
index 0000000000..088fe819e5
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp
@@ -0,0 +1,125 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/CpuTensorHandle.hpp"
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+
+#include <boost/polymorphic_cast.hpp>
+
+namespace armnn
+{
+
+////////////////////////////////////////////
+/// float32 helpers
+////////////////////////////////////////////
+
+inline const TensorInfo& GetTensorInfo(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
+ const ConstCpuTensorHandle* cpuTensorHandle =
+ boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetTensorInfo();
+}
+
+template <typename DataType>
+inline const DataType* GetConstCpuData(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate.
+ const ConstCpuTensorHandle* cpuTensorHandle =
+ boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetConstTensor<DataType>();
+}
+
+template <typename DataType>
+inline DataType* GetCpuData(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
+ const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetTensor<DataType>();
+};
+
+template <typename DataType, typename PayloadType>
+const DataType* GetInputTensorData(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Inputs[idx];
+ return GetConstCpuData<DataType>(tensorHandle);
+}
+
+template <typename DataType, typename PayloadType>
+DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Outputs[idx];
+ return GetCpuData<DataType>(tensorHandle);
+}
+
+template <typename PayloadType>
+const float* GetInputTensorDataFloat(unsigned int idx, const PayloadType& data)
+{
+ return GetInputTensorData<float>(idx, data);
+}
+
+template <typename PayloadType>
+float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data)
+{
+ return GetOutputTensorData<float>(idx, data);
+}
+
+////////////////////////////////////////////
+/// u8 helpers
+////////////////////////////////////////////
+
+inline const uint8_t* GetConstCpuU8Data(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate.
+ const ConstCpuTensorHandle* cpuTensorHandle =
+ boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetConstTensor<uint8_t>();
+};
+
+inline uint8_t* GetCpuU8Data(const ITensorHandle* tensorHandle)
+{
+ // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
+ const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle);
+ return cpuTensorHandle->GetTensor<uint8_t>();
+};
+
+template <typename PayloadType>
+const uint8_t* GetInputTensorDataU8(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Inputs[idx];
+ return GetConstCpuU8Data(tensorHandle);
+}
+
+template <typename PayloadType>
+uint8_t* GetOutputTensorDataU8(unsigned int idx, const PayloadType& data)
+{
+ const ITensorHandle* tensorHandle = data.m_Outputs[idx];
+ return GetCpuU8Data(tensorHandle);
+}
+
+template<typename T>
+std::vector<float> Dequantize(const T* quant, const TensorInfo& info)
+{
+ std::vector<float> ret(info.GetNumElements());
+ for (size_t i = 0; i < info.GetNumElements(); i++)
+ {
+ ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
+ }
+ return ret;
+}
+
+inline void Quantize(uint8_t* quant, const float* dequant, const TensorInfo& info)
+{
+ for (size_t i = 0; i < info.GetNumElements(); i++)
+ {
+ quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp
new file mode 100644
index 0000000000..7b386ed467
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp
@@ -0,0 +1,92 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ResizeBilinear.hpp"
+
+#include "TensorBufferArrayView.hpp"
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <cmath>
+#include <algorithm>
+
+namespace armnn
+{
+
+namespace
+{
+
+inline float Lerp(float a, float b, float w)
+{
+ return w * b + (1.f - w) * a;
+}
+
+}
+
+void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo)
+{
+ // We follow the definition of TensorFlow and AndroidNN: The top-left corner of a texel in the output
+ // image is projected into the input image to figure out the interpolants and weights. Note that this
+ // will yield different results than if projecting the centre of output texels.
+
+ const unsigned int batchSize = inputInfo.GetShape()[0];
+ const unsigned int channelCount = inputInfo.GetShape()[1];
+
+ const unsigned int inputHeight = inputInfo.GetShape()[2];
+ const unsigned int inputWidth = inputInfo.GetShape()[3];
+ const unsigned int outputHeight = outputInfo.GetShape()[2];
+ const unsigned int outputWidth = outputInfo.GetShape()[3];
+
+ // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
+ // in the input image
+ const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
+ const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
+
+ TensorBufferArrayView<const float> input(inputInfo.GetShape(), in);
+ TensorBufferArrayView<float> output(outputInfo.GetShape(), out);
+
+ for (unsigned int n = 0; n < batchSize; ++n)
+ {
+ for (unsigned int c = 0; c < channelCount; ++c)
+ {
+ for (unsigned int y = 0; y < outputHeight; ++y)
+ {
+ // Corresponding real-valued height coordinate in input image
+ const float iy = boost::numeric_cast<float>(y) * scaleY;
+
+ // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation)
+ const float fiy = floorf(iy);
+ const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
+
+ // Interpolation weight (range [0,1])
+ const float yw = iy - fiy;
+
+ for (unsigned int x = 0; x < outputWidth; ++x)
+ {
+ // Real-valued and discrete width coordinates in input image
+ const float ix = boost::numeric_cast<float>(x) * scaleX;
+ const float fix = floorf(ix);
+ const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
+
+ // Interpolation weight (range [0,1])
+ const float xw = ix - fix;
+
+ // Discrete width/height coordinates of texels below and to the right of (x0, y0)
+ const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
+ const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
+
+ // Interpolation
+ const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0
+ const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1
+ const float l = Lerp(ly0, ly1, yw);
+
+ output.Get(n, c, y, x) = l;
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp
new file mode 100644
index 0000000000..50e8128d18
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp
@@ -0,0 +1,15 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Softmax.cpp b/src/armnn/backends/RefWorkloads/Softmax.cpp
new file mode 100644
index 0000000000..58840e3076
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Softmax.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "Softmax.hpp"
+
+#include <cmath>
+#include <vector>
+
+namespace armnn
+{
+
+/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo
+void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta)
+{
+ unsigned int numChannels = tensorInfo.GetShape()[1];
+ for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++)
+ {
+ // find maximum channel
+ float max = in[n * numChannels];
+ for (unsigned int c = 1; c < numChannels; c++)
+ {
+ float val = in[n * numChannels + c];
+ if (val > max)
+ {
+ max = val;
+ }
+ }
+
+ // exponentiate all values and sum
+ std::vector<float> exponentials(numChannels);
+ float sum = 0.0f;
+ for (unsigned int c = 0; c < numChannels; c++)
+ {
+ float val = in[n * numChannels + c];
+ exponentials[c] = expf((val - max) * beta);
+ sum += exponentials[c];
+ }
+
+ // divide exponentials by sum to give outputs
+ for (unsigned int c = 0; c < numChannels; c++)
+ {
+ out[n * numChannels + c] = exponentials[c] / sum;
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Softmax.hpp b/src/armnn/backends/RefWorkloads/Softmax.hpp
new file mode 100644
index 0000000000..c508ab2b82
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Softmax.hpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo
+void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp
new file mode 100644
index 0000000000..67f6c100f9
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Splitter.hpp
@@ -0,0 +1,83 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/assert.hpp>
+
+namespace armnn
+{
+
+template <typename DataType>
+void Splitter(const SplitterQueueDescriptor& data)
+{
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+
+ for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
+ {
+ unsigned int indices[MaxNumOfTensorDimensions];
+
+ unsigned int indexRemainder = index;
+ unsigned int dimensionStride = inputInfo0.GetNumElements();
+
+ for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
+ {
+ dimensionStride /= inputInfo0.GetShape()[i];
+ indices[i] = indexRemainder / dimensionStride; // use integer division to round down
+ indexRemainder -= indices[i] * dimensionStride;
+ }
+
+ for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
+ {
+ SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
+
+ //split view extents are defined by the size of (the corresponding) input tensor
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]);
+
+ // check all dimensions to see if this element is inside the given input view
+ bool insideView = true;
+ for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
+ {
+ if (indices[i] < view.m_Origin[i])
+ {
+ insideView = false;
+ }
+ if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
+ {
+ insideView = false;
+ }
+ }
+
+ if (insideView)
+ {
+ unsigned int outIndex = 0;
+ unsigned int dimensionStride = 1;
+
+ for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
+ {
+ outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
+ dimensionStride *= outputInfo.GetShape()[i];
+ }
+
+ //we are within the view, copy input data to the output corresponding to this view
+ DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
+ BOOST_ASSERT(outputData);
+
+ const DataType* inputData = GetInputTensorData<DataType>(0, data);
+ BOOST_ASSERT(inputData);
+
+ outputData[outIndex] = inputData[index];
+ }
+ }
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp
new file mode 100644
index 0000000000..3994c1f1de
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/assert.hpp>
+
+namespace armnn
+{
+
+// Utility class providing access to raw tensor memory based on indices along each dimension
+template <typename DataType>
+class TensorBufferArrayView
+{
+public:
+ TensorBufferArrayView(const TensorShape& shape, DataType* data)
+ : m_Shape(shape)
+ , m_Data(data)
+ {
+ }
+
+ DataType& Get(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const
+ {
+ BOOST_ASSERT( b < m_Shape[0] || (m_Shape[0] == 0 && b == 0) );
+ BOOST_ASSERT( c < m_Shape[1] || (m_Shape[1] == 0 && c == 0) );
+ BOOST_ASSERT( h < m_Shape[2] || (m_Shape[2] == 0 && h == 0) );
+ BOOST_ASSERT( w < m_Shape[3] || (m_Shape[3] == 0 && w == 0) );
+
+ return m_Data[b * m_Shape[1] * m_Shape[2] * m_Shape[3]
+ + c * m_Shape[2] * m_Shape[3]
+ + h * m_Shape[3]
+ + w];
+ }
+
+private:
+ const TensorShape m_Shape;
+ DataType* m_Data;
+};
+
+} //namespace armnn