aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends/RefWorkloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends/RefWorkloads')
-rw-r--r--src/armnn/backends/RefWorkloads/Activation.cpp2
-rw-r--r--src/armnn/backends/RefWorkloads/Activation.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/Broadcast.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/ConvImpl.cpp2
-rw-r--r--src/armnn/backends/RefWorkloads/ConvImpl.hpp26
-rw-r--r--src/armnn/backends/RefWorkloads/FullyConnected.cpp6
-rw-r--r--src/armnn/backends/RefWorkloads/FullyConnected.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/Merger.hpp14
-rw-r--r--src/armnn/backends/RefWorkloads/Pooling2d.cpp8
-rw-r--r--src/armnn/backends/RefWorkloads/Pooling2d.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp15
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp9
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp23
-rw-r--r--src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp9
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp25
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp29
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp13
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp8
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp15
-rw-r--r--src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp9
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp13
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp8
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp16
-rw-r--r--src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp7
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp10
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp7
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp16
-rw-r--r--src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp7
-rw-r--r--src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp16
-rw-r--r--src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp1
-rw-r--r--src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp13
-rw-r--r--src/armnn/backends/RefWorkloads/ResizeBilinear.cpp22
-rw-r--r--src/armnn/backends/RefWorkloads/Softmax.cpp8
-rw-r--r--src/armnn/backends/RefWorkloads/Softmax.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/Splitter.hpp8
-rw-r--r--src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp2
41 files changed, 348 insertions, 98 deletions
diff --git a/src/armnn/backends/RefWorkloads/Activation.cpp b/src/armnn/backends/RefWorkloads/Activation.cpp
index ede283cbf9..fdb6091ad7 100644
--- a/src/armnn/backends/RefWorkloads/Activation.cpp
+++ b/src/armnn/backends/RefWorkloads/Activation.cpp
@@ -24,7 +24,7 @@ void Activation(const float* in,
float input = in[i];
float output;
- // compute the result of the activation function
+ // Compute the result of the activation function.
switch (function)
{
case ActivationFunction::Linear:
diff --git a/src/armnn/backends/RefWorkloads/Activation.hpp b/src/armnn/backends/RefWorkloads/Activation.hpp
index 874441c862..4ee604b462 100644
--- a/src/armnn/backends/RefWorkloads/Activation.hpp
+++ b/src/armnn/backends/RefWorkloads/Activation.hpp
@@ -9,7 +9,7 @@
namespace armnn
{
-/// Performs the ActivationFunction elementwise on the inputs to give the outputs
+/// Performs the ActivationFunction elementwise on the inputs to give the outputs.
void Activation(const float* in,
float* out,
const TensorInfo& tensorInfo,
diff --git a/src/armnn/backends/RefWorkloads/Broadcast.hpp b/src/armnn/backends/RefWorkloads/Broadcast.hpp
index b65b57f7a1..bdf03f2a16 100644
--- a/src/armnn/backends/RefWorkloads/Broadcast.hpp
+++ b/src/armnn/backends/RefWorkloads/Broadcast.hpp
@@ -43,7 +43,7 @@ struct BroadcastLoop
}
private:
- // Struct to hold the dimension data
+ // Struct to hold the dimension data.
struct BroadcastDimensionData
{
unsigned int m_DimSize;
diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.cpp b/src/armnn/backends/RefWorkloads/ConvImpl.cpp
index 9ebadacddb..3dcd344101 100644
--- a/src/armnn/backends/RefWorkloads/ConvImpl.cpp
+++ b/src/armnn/backends/RefWorkloads/ConvImpl.cpp
@@ -46,7 +46,7 @@ int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const
int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
{
- // Check for overflow
+ // Check for overflow.
if (a == b && a == std::numeric_limits<int32_t>::min())
{
return std::numeric_limits<int32_t>::max();
diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.hpp b/src/armnn/backends/RefWorkloads/ConvImpl.hpp
index 8b66b0b7d2..b7d5d17a8d 100644
--- a/src/armnn/backends/RefWorkloads/ConvImpl.hpp
+++ b/src/armnn/backends/RefWorkloads/ConvImpl.hpp
@@ -18,7 +18,7 @@
namespace armnn
{
-/// Performs multiplication of a integer with a multiplier which is less than one,
+/// Performs multiplication of an integer with a multiplier which is less than one,
/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.
struct QuantizedMultiplierSmallerThanOne
{
@@ -28,21 +28,21 @@ public:
/// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().
QuantizedMultiplierSmallerThanOne(float multiplier);
- /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne()
+ /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().
int32_t operator*(int32_t rhs) const;
private:
- /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul()
+ /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().
static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);
- /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT()
+ /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().
static int32_t RoundingDivideByPOT(int32_t x, int exponent);
int32_t m_Multiplier;
int32_t m_RightShift;
};
-/// an implementation shared by normal and depthwise convolution
+/// An implementation shared by normal and depthwise convolution.
template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
static void ConvImpl(ConvData data,
const InputType* inputData,
@@ -55,6 +55,7 @@ static void ConvImpl(ConvData data,
InputType* outputData,
float outputScale,
int32_t outputOffset,
+ const TensorInfo& filterInfo,
bool depthwise = false)
{
if (data.m_Parameters.m_BiasEnabled && !biasData)
@@ -64,7 +65,6 @@ static void ConvImpl(ConvData data,
const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
- const TensorInfo& filterInfo = data.m_Weight->GetTensorInfo();
unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;
unsigned int channelsInput = filterInfo.GetShape()[1];
@@ -84,7 +84,7 @@ static void ConvImpl(ConvData data,
unsigned int hStride = data.m_Parameters.m_StrideY;
unsigned int xStride = data.m_Parameters.m_StrideX;
- // the world's least efficient convolution
+ // The world's least efficient convolution.
for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
{
for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)
@@ -93,11 +93,11 @@ static void ConvImpl(ConvData data,
{
for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)
{
- // this loop goes over each output element
+ // This loop goes over each output element.
AccumulatorType sum = AccumulatorType();
- // for depthwise, each output channel corresponds to exactly one input channel
- // for normal, must loop over each input channel
+ // For depthwise, each output channel corresponds to exactly one input channel.
+ // For normal, must loop over each input channel.
for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)
{
unsigned int depthwiseMultiplierIdx = 0;
@@ -111,11 +111,11 @@ static void ConvImpl(ConvData data,
{
for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)
{
- // this loop goes over each input element for each output element
+ // This loop goes over each input element for each output element.
unsigned int filterIndex;
- // since dimensionality of kernel depends on depthwiseness, so does index
+ // Since dimensionality of kernel depends on depthwiseness, so does index.
if (depthwise)
{
filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput +
@@ -138,7 +138,7 @@ static void ConvImpl(ConvData data,
AccumulatorType inputValue;
- // check if we're in the padding
+ // Check if we're in the padding.
if (yInput < paddingTop || yInput >= heightInput + paddingTop ||
xInput < paddingLeft || xInput >= widthInput + paddingLeft )
{
diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.cpp b/src/armnn/backends/RefWorkloads/FullyConnected.cpp
index 8ba11d19c6..1a8263b9a1 100644
--- a/src/armnn/backends/RefWorkloads/FullyConnected.cpp
+++ b/src/armnn/backends/RefWorkloads/FullyConnected.cpp
@@ -18,11 +18,11 @@ void FullyConnected(const float* inputData,
const float* biasData,
bool transposeWeights)
{
- unsigned int N = outputTensorInfo.GetShape()[1]; // Output Vector Size
+ unsigned int N = outputTensorInfo.GetShape()[1]; // Outputs Vector Size.
- BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Need some data
+ BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Needs some data.
- unsigned int K = 1; // Total number of activations in the input
+ unsigned int K = 1; // Total number of activations in the input.
for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++)
{
K *= inputTensorInfo.GetShape()[i];
diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.hpp b/src/armnn/backends/RefWorkloads/FullyConnected.hpp
index 9fa2456110..fa6f54a3ec 100644
--- a/src/armnn/backends/RefWorkloads/FullyConnected.hpp
+++ b/src/armnn/backends/RefWorkloads/FullyConnected.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-/// Performs a matrix multiplication and optionally adds a bias
+/// Performs a matrix multiplication and optionally adds a bias.
void FullyConnected(const float* inputData,
float* outputData,
const TensorInfo& inputTensorInfo,
diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp
index 7d1bfab557..1294d05e08 100644
--- a/src/armnn/backends/RefWorkloads/Merger.hpp
+++ b/src/armnn/backends/RefWorkloads/Merger.hpp
@@ -29,7 +29,7 @@ void Merger(const MergerQueueDescriptor& data)
for (unsigned int i=0; i<outputInfo0.GetNumDimensions(); i++)
{
dimensionStride /= outputInfo0.GetShape()[i];
- indices[i] = indexRemainder / dimensionStride; // use integer division to round down
+ indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
indexRemainder -= indices[i] * dimensionStride;
}
@@ -37,11 +37,11 @@ void Merger(const MergerQueueDescriptor& data)
{
MergerQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
- //split view extents are defined by the size of (the corresponding) input tensor
+ //Split view extents are defined by the size of (the corresponding) input tensor.
const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]);
BOOST_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
- // check all dimensions to see if this element is inside the given input view
+ // Check all dimensions to see if this element is inside the given input view.
bool insideView = true;
for (unsigned int i=0; i<inputInfo.GetNumDimensions(); i++)
{
@@ -66,13 +66,13 @@ void Merger(const MergerQueueDescriptor& data)
dimensionStride *= inputInfo.GetShape()[i];
}
- //we are within the view, copy input data to the output corresponding to this view
+ //We are within the view, copy input data to the output corresponding to this view.
(GetOutputTensorData<DataType>(0, data))[index] =
(GetInputTensorData<DataType>(viewIdx, data))[inIndex];
- //what should we do if input views overlap on the output tensor?
- //we could error, take the average, or shm else...
- //for now just stop after finding first view (input) that matches.
+ //What should we do if input views overlap on the output tensor?
+ //We could error, take the average, or shm else...
+ //For now just stop after finding first view (input) that matches.
break;
}
}
diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
index a643e67690..4047f061b3 100644
--- a/src/armnn/backends/RefWorkloads/Pooling2d.cpp
+++ b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
@@ -164,7 +164,7 @@ void Pooling2d(const float* in,
Executor execute = GetExecutor(params.m_PoolType);
// Check supported padding methods outside the loop to simplify
- // the inner loop
+ // the inner loop.
if (params.m_PaddingMethod != PaddingMethod::Exclude &&
params.m_PaddingMethod != PaddingMethod::IgnoreValue)
{
@@ -192,7 +192,7 @@ void Pooling2d(const float* in,
float result = defaultInitializer;
float poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
- // special case: when the pooling kernel is over a padding region and the padding
+ // Special case: when the pooling kernel is over a padding region and the padding
// size is larger or equal to the kernel and the kernel only covers
// padding and no real values, then we initialize the result as zero
// by convention. This is because we need to choose a value here and
@@ -208,8 +208,8 @@ void Pooling2d(const float* in,
if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
{
- // when we exclude the padding, it means we calculate with a smaller
- // kernel size, so I change the divisor here
+ // When we exclude the padding, it means we calculate with a smaller
+ // kernel size, so I changed the divisor here.
poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
}
diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.hpp b/src/armnn/backends/RefWorkloads/Pooling2d.hpp
index f88b1a0a4e..cefd022fb3 100644
--- a/src/armnn/backends/RefWorkloads/Pooling2d.hpp
+++ b/src/armnn/backends/RefWorkloads/Pooling2d.hpp
@@ -11,7 +11,7 @@
namespace armnn
{
-/// Computes the Pooling2d operation
+/// Computes the Pooling2d operation.
void Pooling2d(const float* in,
float* out,
const TensorInfo& inputInfo,
diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp
index 0ede46d9fb..9044fca1c2 100644
--- a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp
@@ -13,7 +13,7 @@
namespace armnn
{
-// Base class template providing an implementation of the Constant layer common to all data types
+// Base class template providing an implementation of the Constant layer common to all data types.
template <armnn::DataType DataType>
class RefBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType>
{
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp
index c421b0f212..fbc1f07111 100644
--- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp
@@ -12,15 +12,22 @@
namespace armnn
{
+RefBatchNormalizationFloat32Workload::RefBatchNormalizationFloat32Workload(
+ const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info),
+ m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))),
+ m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))),
+ m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))),
+ m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {}
void RefBatchNormalizationFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute");
- const float* var = m_Data.m_Variance->GetConstTensor<float>();
- const float* mean = m_Data.m_Mean->GetConstTensor<float>();
- const float* gamma = m_Data.m_Gamma->GetConstTensor<float>();
- const float* beta = m_Data.m_Beta->GetConstTensor<float>();
+ const float* var = m_Variance->GetConstTensor<float>();
+ const float* mean = m_Mean->GetConstTensor<float>();
+ const float* gamma = m_Gamma->GetConstTensor<float>();
+ const float* beta = m_Beta->GetConstTensor<float>();
auto inputData = GetInputTensorDataFloat(0, m_Data);
auto outputData = GetOutputTensorDataFloat(0, m_Data);
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp
index cbcdadd749..780c329cc6 100644
--- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp
@@ -14,8 +14,15 @@ namespace armnn
class RefBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
{
public:
- using Float32Workload<BatchNormalizationQueueDescriptor>::Float32Workload;
+ explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Mean;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Variance;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Beta;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Gamma;
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp
index 8a48523765..4a8e296619 100644
--- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp
@@ -14,23 +14,30 @@
namespace armnn
{
+RefBatchNormalizationUint8Workload::RefBatchNormalizationUint8Workload(
+ const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<BatchNormalizationQueueDescriptor>(descriptor, info),
+ m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))),
+ m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))),
+ m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))),
+ m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {}
void RefBatchNormalizationUint8Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute");
const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& varInfo = GetTensorInfo(m_Data.m_Variance);
- const TensorInfo& meanInfo = GetTensorInfo(m_Data.m_Mean);
- const TensorInfo& gammaInfo = GetTensorInfo(m_Data.m_Gamma);
- const TensorInfo& betaInfo = GetTensorInfo(m_Data.m_Beta);
+ const TensorInfo& varInfo = GetTensorInfo(m_Variance.get());
+ const TensorInfo& meanInfo = GetTensorInfo(m_Mean.get());
+ const TensorInfo& gammaInfo = GetTensorInfo(m_Gamma.get());
+ const TensorInfo& betaInfo = GetTensorInfo(m_Beta.get());
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0);
- auto var = Dequantize(m_Data.m_Variance->GetConstTensor<uint8_t>(), varInfo);
- auto mean = Dequantize(m_Data.m_Mean->GetConstTensor<uint8_t>(), meanInfo);
- auto gamma = Dequantize(m_Data.m_Gamma->GetConstTensor<uint8_t>(), gammaInfo);
- auto beta = Dequantize(m_Data.m_Beta->GetConstTensor<uint8_t>(), betaInfo);
+ auto var = Dequantize(m_Variance->GetConstTensor<uint8_t>(), varInfo);
+ auto mean = Dequantize(m_Mean->GetConstTensor<uint8_t>(), meanInfo);
+ auto gamma = Dequantize(m_Gamma->GetConstTensor<uint8_t>(), gammaInfo);
+ auto beta = Dequantize(m_Beta->GetConstTensor<uint8_t>(), betaInfo);
std::vector<float> results(outputInfo.GetNumElements());
BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data());
diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp
index 57fe995ba5..2c12d28c3f 100644
--- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp
@@ -14,8 +14,15 @@ namespace armnn
class RefBatchNormalizationUint8Workload : public Uint8Workload<BatchNormalizationQueueDescriptor>
{
public:
- using Uint8Workload<BatchNormalizationQueueDescriptor>::Uint8Workload;
+ explicit RefBatchNormalizationUint8Workload(const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Mean;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Variance;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Beta;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Gamma;
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp
new file mode 100644
index 0000000000..c4b78014b2
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefConvertFp16ToFp32Workload.hpp"
+#include "Half.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "FloatingPointConverter.hpp"
+
+namespace armnn
+{
+
+void RefConvertFp16ToFp32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute");
+
+ const Half* const input = GetInputTensorDataHalf(0, m_Data);
+ float* const output = GetOutputTensorDataFloat(0, m_Data);
+
+ unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp
new file mode 100644
index 0000000000..34ae35545b
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>
+{
+public:
+ using Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>::Float16ToFloat32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp
new file mode 100644
index 0000000000..3c93297302
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefConvertFp32ToFp16Workload.hpp"
+
+#include "Half.hpp"
+#include "FloatingPointConverter.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefConvertFp32ToFp16Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute");
+
+ const float* const input = GetInputTensorDataFloat(0, m_Data);
+ Half* const output = GetOutputTensorDataHalf(0, m_Data);
+
+ // convert Fp32 input to Fp16 output
+ unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp
new file mode 100644
index 0000000000..903a50449f
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>
+{
+public:
+ using Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>::Float32ToFloat16Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp
index 6e4cc69063..4fe823a288 100644
--- a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp
@@ -12,6 +12,12 @@
namespace armnn
{
+RefConvolution2dFloat32Workload::RefConvolution2dFloat32Workload(
+ const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
void RefConvolution2dFloat32Workload::Execute() const
{
@@ -19,12 +25,13 @@ void RefConvolution2dFloat32Workload::Execute() const
float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData = GetInputTensorDataFloat(0, m_Data);
- const float* weightData = m_Data.m_Weight->template GetConstTensor<float>();
+ const float* weightData = m_Weight->template GetConstTensor<float>();
const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Data.m_Bias->template GetConstTensor<float>() : nullptr;
+ m_Bias->template GetConstTensor<float>() : nullptr;
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>(
- m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0);
+ m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo);
}
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp
index 514369c262..ecf0082f33 100644
--- a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp
@@ -14,8 +14,14 @@ namespace armnn
class RefConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
{
public:
- using Float32Workload<Convolution2dQueueDescriptor>::Float32Workload;
+ explicit RefConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp
index f390baa387..19e9c2ed0a 100644
--- a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp
@@ -12,6 +12,12 @@
namespace armnn
{
+RefConvolution2dUint8Workload::RefConvolution2dUint8Workload(
+ const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
void RefConvolution2dUint8Workload::Execute() const
{
@@ -19,20 +25,21 @@ void RefConvolution2dUint8Workload::Execute() const
const uint8_t* inputData = GetInputTensorDataU8(0, m_Data);
const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor<uint8_t>();
- const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight);
+ const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
+ const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Data.m_Bias->template GetConstTensor<int32_t>() :
+ m_Bias->template GetConstTensor<int32_t>() :
nullptr;
uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::Convolution2dQueueDescriptor, uint8_t, int32_t, int32_t>(
m_Data,
inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
biasData,
- outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset());
+ outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo);
}
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp
index 954a206463..733d2052b2 100644
--- a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp
@@ -14,8 +14,15 @@ namespace armnn
class RefConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
{
public:
- using Uint8Workload<Convolution2dQueueDescriptor>::Uint8Workload;
+ explicit RefConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp
index c631fecb66..f3167e299a 100644
--- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp
@@ -12,6 +12,12 @@
namespace armnn
{
+RefDepthwiseConvolution2dFloat32Workload::RefDepthwiseConvolution2dFloat32Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
void RefDepthwiseConvolution2dFloat32Workload::Execute() const
{
@@ -19,12 +25,13 @@ void RefDepthwiseConvolution2dFloat32Workload::Execute() const
float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData = GetInputTensorDataFloat(0, m_Data);
- const float* weightData = m_Data.m_Weight->template GetConstTensor<float>();
+ const float* weightData = m_Weight->template GetConstTensor<float>();
const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Data.m_Bias->template GetConstTensor<float>() : nullptr;
+ m_Bias->template GetConstTensor<float>() : nullptr;
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float>
- (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, true);
+ (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true);
}
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp
index 34e6524684..042e7b3c0a 100644
--- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp
@@ -14,8 +14,14 @@ namespace armnn
class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
{
public:
- using Float32Workload<DepthwiseConvolution2dQueueDescriptor>::Float32Workload;
+ explicit RefDepthwiseConvolution2dFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp
index 5a8fb13112..fd5ade5559 100644
--- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp
@@ -13,26 +13,34 @@
namespace armnn
{
+RefDepthwiseConvolution2dUint8Workload::RefDepthwiseConvolution2dUint8Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
+
void RefDepthwiseConvolution2dUint8Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute");
const uint8_t* inputData = GetInputTensorDataU8(0, m_Data);
const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor<uint8_t>();
- const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight);
+ const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
+ const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Data.m_Bias->template GetConstTensor<int32_t>() :
+ m_Bias->template GetConstTensor<int32_t>() :
nullptr;
uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, uint8_t, int32_t, int32_t>(
m_Data,
inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
biasData,
- outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), true);
+ outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true);
}
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp
index bd9945f529..2c8ed2d084 100644
--- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp
@@ -14,8 +14,13 @@ namespace armnn
class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor>
{
public:
- using Uint8Workload<DepthwiseConvolution2dQueueDescriptor>::Uint8Workload;
+ explicit RefDepthwiseConvolution2dUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp
index 6fe203e5f0..818455e0e9 100644
--- a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp
@@ -12,6 +12,12 @@
namespace armnn
{
+RefFullyConnectedFloat32Workload::RefFullyConnectedFloat32Workload(
+ const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
void RefFullyConnectedFloat32Workload::Execute() const
{
@@ -22,8 +28,8 @@ void RefFullyConnectedFloat32Workload::Execute() const
float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData = GetInputTensorDataFloat(0, m_Data);
- const float* weightData = m_Data.m_Weight->GetConstTensor<float>();
- const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Data.m_Bias->GetConstTensor<float>() : nullptr;
+ const float* weightData = m_Weight->GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->GetConstTensor<float>() : nullptr;
FullyConnected(inputData,
outputData,
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp
index cb835bd2ce..639d935a16 100644
--- a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp
@@ -14,8 +14,13 @@ namespace armnn
class RefFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor>
{
public:
- using Float32Workload<FullyConnectedQueueDescriptor>::Float32Workload;
+ explicit RefFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp
index 0186d3f5e5..cd653657e1 100644
--- a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp
@@ -14,6 +14,12 @@
namespace armnn
{
+RefFullyConnectedUint8Workload::RefFullyConnectedUint8Workload(
+ const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<FullyConnectedQueueDescriptor>(descriptor, info),
+ m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
+ m_Bias(descriptor.m_Parameters.m_BiasEnabled
+ ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
void RefFullyConnectedUint8Workload::Execute() const
{
@@ -22,18 +28,18 @@ void RefFullyConnectedUint8Workload::Execute() const
const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- const uint8_t* weightData = m_Data.m_Weight->GetConstTensor<uint8_t>();
+ const uint8_t* weightData = m_Weight->GetConstTensor<uint8_t>();
auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
- auto weight = Dequantize(weightData, m_Data.m_Weight->GetTensorInfo());
+ auto weight = Dequantize(weightData, m_Weight->GetTensorInfo());
- std::vector<float> results(inputInfo.GetNumElements());
+ std::vector<float> results(outputInfo.GetNumElements());
if (m_Data.m_Parameters.m_BiasEnabled)
{
- const int32_t* biasData = m_Data.m_Bias->GetConstTensor<int32_t>();
- auto bias = Dequantize(biasData, m_Data.m_Bias->GetTensorInfo());
+ const int32_t* biasData = m_Bias->GetConstTensor<int32_t>();
+ auto bias = Dequantize(biasData, m_Bias->GetTensorInfo());
FullyConnected(dequant.data(),
results.data(),
diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp
index cd14ea85e0..36e5f631ad 100644
--- a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp
+++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp
@@ -14,8 +14,13 @@ namespace armnn
class RefFullyConnectedUint8Workload : public Uint8Workload<FullyConnectedQueueDescriptor>
{
public:
- using Uint8Workload<FullyConnectedQueueDescriptor>::Uint8Workload;
+ explicit RefFullyConnectedUint8Workload(const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
};
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp
new file mode 100644
index 0000000000..bc33638310
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefLstmFloat32Workload.hpp"
+
+namespace armnn
+{
+
+void RefLstmFloat32Workload::Execute() const
+{
+ throw armnn::Exception("No implementation of Lstm in the Ref backend!");
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp
new file mode 100644
index 0000000000..0acce4d309
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefLstmFloat32Workload : public Float32Workload<LstmQueueDescriptor>
+{
+public:
+ using Float32Workload<LstmQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp
index c743207423..f4dff60ae4 100644
--- a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp
@@ -17,7 +17,7 @@
namespace armnn
{
-// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization
+// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization.
static void NormalizeWithinUingLbr(const float* inputData,
float* outputData,
const TensorShape& tensorShape,
@@ -80,7 +80,7 @@ static void NormalizeWithinUingLbr(const float* inputData,
}
}
-// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization
+// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization.
void NormalizeAcrossUingLbr(const float* inputData,
float* outputData,
const TensorShape& tensorShape,
diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp
index b2bb8fbf3d..93c883d826 100644
--- a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp
@@ -7,6 +7,7 @@
#include "RefWorkloadUtils.hpp"
#include <Permute.hpp>
+#include "TypeUtils.hpp"
namespace armnn
{
diff --git a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp
index 088fe819e5..1df735ea55 100644
--- a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp
+++ b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp
@@ -9,6 +9,7 @@
#include <armnn/Tensor.hpp>
#include <armnn/Types.hpp>
+#include <Half.hpp>
#include <boost/polymorphic_cast.hpp>
@@ -70,6 +71,18 @@ float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data)
return GetOutputTensorData<float>(idx, data);
}
+template <typename PayloadType>
+const Half* GetInputTensorDataHalf(unsigned int idx, const PayloadType& data)
+{
+ return GetInputTensorData<Half>(idx, data);
+}
+
+template <typename PayloadType>
+Half* GetOutputTensorDataHalf(unsigned int idx, const PayloadType& data)
+{
+ return GetOutputTensorData<Half>(idx, data);
+}
+
////////////////////////////////////////////
/// u8 helpers
////////////////////////////////////////////
diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp
index 7b386ed467..d8bca4be44 100644
--- a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp
+++ b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp
@@ -27,7 +27,7 @@ inline float Lerp(float a, float b, float w)
void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo)
{
- // We follow the definition of TensorFlow and AndroidNN: The top-left corner of a texel in the output
+ // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
// image is projected into the input image to figure out the interpolants and weights. Note that this
// will yield different results than if projecting the centre of output texels.
@@ -39,8 +39,8 @@ void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, co
const unsigned int outputHeight = outputInfo.GetShape()[2];
const unsigned int outputWidth = outputInfo.GetShape()[3];
- // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
- // in the input image
+ // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates
+ // in the input image.
const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
@@ -53,33 +53,33 @@ void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, co
{
for (unsigned int y = 0; y < outputHeight; ++y)
{
- // Corresponding real-valued height coordinate in input image
+ // Corresponding real-valued height coordinate in input image.
const float iy = boost::numeric_cast<float>(y) * scaleY;
- // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation)
+ // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
const float fiy = floorf(iy);
const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
- // Interpolation weight (range [0,1])
+ // Interpolation weight (range [0,1]).
const float yw = iy - fiy;
for (unsigned int x = 0; x < outputWidth; ++x)
{
- // Real-valued and discrete width coordinates in input image
+ // Real-valued and discrete width coordinates in input image.
const float ix = boost::numeric_cast<float>(x) * scaleX;
const float fix = floorf(ix);
const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
- // Interpolation weight (range [0,1])
+ // Interpolation weight (range [0,1]).
const float xw = ix - fix;
- // Discrete width/height coordinates of texels below and to the right of (x0, y0)
+ // Discrete width/height coordinates of texels below and to the right of (x0, y0).
const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
// Interpolation
- const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0
- const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1
+ const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0.
+ const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1.
const float l = Lerp(ly0, ly1, yw);
output.Get(n, c, y, x) = l;
diff --git a/src/armnn/backends/RefWorkloads/Softmax.cpp b/src/armnn/backends/RefWorkloads/Softmax.cpp
index 58840e3076..c9f0bc5e59 100644
--- a/src/armnn/backends/RefWorkloads/Softmax.cpp
+++ b/src/armnn/backends/RefWorkloads/Softmax.cpp
@@ -11,13 +11,13 @@
namespace armnn
{
-/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo
+/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.
void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta)
{
unsigned int numChannels = tensorInfo.GetShape()[1];
for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++)
{
- // find maximum channel
+ // Find maximum channel.
float max = in[n * numChannels];
for (unsigned int c = 1; c < numChannels; c++)
{
@@ -28,7 +28,7 @@ void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float be
}
}
- // exponentiate all values and sum
+ // Exponentiate all values and sum.
std::vector<float> exponentials(numChannels);
float sum = 0.0f;
for (unsigned int c = 0; c < numChannels; c++)
@@ -38,7 +38,7 @@ void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float be
sum += exponentials[c];
}
- // divide exponentials by sum to give outputs
+ // Divide exponentials by sum to give outputs.
for (unsigned int c = 0; c < numChannels; c++)
{
out[n * numChannels + c] = exponentials[c] / sum;
diff --git a/src/armnn/backends/RefWorkloads/Softmax.hpp b/src/armnn/backends/RefWorkloads/Softmax.hpp
index c508ab2b82..f75388dc2b 100644
--- a/src/armnn/backends/RefWorkloads/Softmax.hpp
+++ b/src/armnn/backends/RefWorkloads/Softmax.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo
+/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.
void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta);
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp
index bd5da6cfe2..c12d9368bf 100644
--- a/src/armnn/backends/RefWorkloads/Splitter.hpp
+++ b/src/armnn/backends/RefWorkloads/Splitter.hpp
@@ -31,7 +31,7 @@ void Splitter(const SplitterQueueDescriptor& data)
for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
{
dimensionStride /= inputInfo0.GetShape()[i];
- indices[i] = indexRemainder / dimensionStride; // use integer division to round down
+ indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
indexRemainder -= indices[i] * dimensionStride;
}
@@ -39,11 +39,11 @@ void Splitter(const SplitterQueueDescriptor& data)
{
SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
- //split view extents are defined by the size of (the corresponding) input tensor
+ //Split view extents are defined by the size of (the corresponding) input tensor.
const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]);
BOOST_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
- // check all dimensions to see if this element is inside the given input view
+ // Check all dimensions to see if this element is inside the given input view.
bool insideView = true;
for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
{
@@ -68,7 +68,7 @@ void Splitter(const SplitterQueueDescriptor& data)
dimensionStride *= outputInfo.GetShape()[i];
}
- //we are within the view, copy input data to the output corresponding to this view
+ //We are within the view, to copy input data to the output corresponding to this view.
DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
BOOST_ASSERT(outputData);
diff --git a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp
index 3994c1f1de..ad0f38e867 100644
--- a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp
+++ b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-// Utility class providing access to raw tensor memory based on indices along each dimension
+// Utility class providing access to raw tensor memory based on indices along each dimension.
template <typename DataType>
class TensorBufferArrayView
{