From c577f2c6a3b4ddb6ba87a882723c53a248afbeba Mon Sep 17 00:00:00 2001 From: telsoa01 Date: Fri, 31 Aug 2018 09:22:23 +0100 Subject: Release 18.08 --- src/armnn/backends/RefWorkloads/Activation.cpp | 2 +- src/armnn/backends/RefWorkloads/Activation.hpp | 2 +- src/armnn/backends/RefWorkloads/Broadcast.hpp | 2 +- src/armnn/backends/RefWorkloads/ConvImpl.cpp | 2 +- src/armnn/backends/RefWorkloads/ConvImpl.hpp | 26 +++++++++---------- src/armnn/backends/RefWorkloads/FullyConnected.cpp | 6 ++--- src/armnn/backends/RefWorkloads/FullyConnected.hpp | 2 +- src/armnn/backends/RefWorkloads/Merger.hpp | 14 +++++------ src/armnn/backends/RefWorkloads/Pooling2d.cpp | 8 +++--- src/armnn/backends/RefWorkloads/Pooling2d.hpp | 2 +- .../RefWorkloads/RefBaseConstantWorkload.hpp | 2 +- .../RefBatchNormalizationFloat32Workload.cpp | 15 ++++++++--- .../RefBatchNormalizationFloat32Workload.hpp | 9 ++++++- .../RefBatchNormalizationUint8Workload.cpp | 23 +++++++++++------ .../RefBatchNormalizationUint8Workload.hpp | 9 ++++++- .../RefWorkloads/RefConvertFp16ToFp32Workload.cpp | 25 +++++++++++++++++++ .../RefWorkloads/RefConvertFp16ToFp32Workload.hpp | 21 ++++++++++++++++ .../RefWorkloads/RefConvertFp32ToFp16Workload.cpp | 29 ++++++++++++++++++++++ .../RefWorkloads/RefConvertFp32ToFp16Workload.hpp | 21 ++++++++++++++++ .../RefConvolution2dFloat32Workload.cpp | 13 +++++++--- .../RefConvolution2dFloat32Workload.hpp | 8 +++++- .../RefWorkloads/RefConvolution2dUint8Workload.cpp | 15 ++++++++--- .../RefWorkloads/RefConvolution2dUint8Workload.hpp | 9 ++++++- .../RefDepthwiseConvolution2dFloat32Workload.cpp | 13 +++++++--- .../RefDepthwiseConvolution2dFloat32Workload.hpp | 8 +++++- .../RefDepthwiseConvolution2dUint8Workload.cpp | 16 +++++++++--- .../RefDepthwiseConvolution2dUint8Workload.hpp | 7 +++++- .../RefFullyConnectedFloat32Workload.cpp | 10 ++++++-- .../RefFullyConnectedFloat32Workload.hpp | 7 +++++- .../RefFullyConnectedUint8Workload.cpp | 16 ++++++++---- .../RefFullyConnectedUint8Workload.hpp | 7 +++++- .../RefWorkloads/RefLstmFloat32Workload.cpp | 16 ++++++++++++ .../RefWorkloads/RefLstmFloat32Workload.hpp | 21 ++++++++++++++++ .../RefNormalizationFloat32Workload.cpp | 4 +-- .../backends/RefWorkloads/RefPermuteWorkload.cpp | 1 + .../backends/RefWorkloads/RefWorkloadUtils.hpp | 13 ++++++++++ src/armnn/backends/RefWorkloads/ResizeBilinear.cpp | 22 ++++++++-------- src/armnn/backends/RefWorkloads/Softmax.cpp | 8 +++--- src/armnn/backends/RefWorkloads/Softmax.hpp | 2 +- src/armnn/backends/RefWorkloads/Splitter.hpp | 8 +++--- .../RefWorkloads/TensorBufferArrayView.hpp | 2 +- 41 files changed, 348 insertions(+), 98 deletions(-) create mode 100644 src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp create mode 100644 src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp create mode 100644 src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp create mode 100644 src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp create mode 100644 src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp create mode 100644 src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp (limited to 'src/armnn/backends/RefWorkloads') diff --git a/src/armnn/backends/RefWorkloads/Activation.cpp b/src/armnn/backends/RefWorkloads/Activation.cpp index ede283cbf9..fdb6091ad7 100644 --- a/src/armnn/backends/RefWorkloads/Activation.cpp +++ b/src/armnn/backends/RefWorkloads/Activation.cpp @@ -24,7 +24,7 @@ void Activation(const float* in, float input = in[i]; float output; - // compute the result of the activation function + // Compute the result of the activation function. switch (function) { case ActivationFunction::Linear: diff --git a/src/armnn/backends/RefWorkloads/Activation.hpp b/src/armnn/backends/RefWorkloads/Activation.hpp index 874441c862..4ee604b462 100644 --- a/src/armnn/backends/RefWorkloads/Activation.hpp +++ b/src/armnn/backends/RefWorkloads/Activation.hpp @@ -9,7 +9,7 @@ namespace armnn { -/// Performs the ActivationFunction elementwise on the inputs to give the outputs +/// Performs the ActivationFunction elementwise on the inputs to give the outputs. void Activation(const float* in, float* out, const TensorInfo& tensorInfo, diff --git a/src/armnn/backends/RefWorkloads/Broadcast.hpp b/src/armnn/backends/RefWorkloads/Broadcast.hpp index b65b57f7a1..bdf03f2a16 100644 --- a/src/armnn/backends/RefWorkloads/Broadcast.hpp +++ b/src/armnn/backends/RefWorkloads/Broadcast.hpp @@ -43,7 +43,7 @@ struct BroadcastLoop } private: - // Struct to hold the dimension data + // Struct to hold the dimension data. struct BroadcastDimensionData { unsigned int m_DimSize; diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.cpp b/src/armnn/backends/RefWorkloads/ConvImpl.cpp index 9ebadacddb..3dcd344101 100644 --- a/src/armnn/backends/RefWorkloads/ConvImpl.cpp +++ b/src/armnn/backends/RefWorkloads/ConvImpl.cpp @@ -46,7 +46,7 @@ int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) { - // Check for overflow + // Check for overflow. if (a == b && a == std::numeric_limits::min()) { return std::numeric_limits::max(); diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.hpp b/src/armnn/backends/RefWorkloads/ConvImpl.hpp index 8b66b0b7d2..b7d5d17a8d 100644 --- a/src/armnn/backends/RefWorkloads/ConvImpl.hpp +++ b/src/armnn/backends/RefWorkloads/ConvImpl.hpp @@ -18,7 +18,7 @@ namespace armnn { -/// Performs multiplication of a integer with a multiplier which is less than one, +/// Performs multiplication of an integer with a multiplier which is less than one, /// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. struct QuantizedMultiplierSmallerThanOne { @@ -28,21 +28,21 @@ public: /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne(). QuantizedMultiplierSmallerThanOne(float multiplier); - /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne() + /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne(). int32_t operator*(int32_t rhs) const; private: - /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul() + /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul(). static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b); - /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT() + /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT(). static int32_t RoundingDivideByPOT(int32_t x, int exponent); int32_t m_Multiplier; int32_t m_RightShift; }; -/// an implementation shared by normal and depthwise convolution +/// An implementation shared by normal and depthwise convolution. template static void ConvImpl(ConvData data, const InputType* inputData, @@ -55,6 +55,7 @@ static void ConvImpl(ConvData data, InputType* outputData, float outputScale, int32_t outputOffset, + const TensorInfo& filterInfo, bool depthwise = false) { if (data.m_Parameters.m_BiasEnabled && !biasData) @@ -64,7 +65,6 @@ static void ConvImpl(ConvData data, const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); - const TensorInfo& filterInfo = data.m_Weight->GetTensorInfo(); unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; unsigned int channelsInput = filterInfo.GetShape()[1]; @@ -84,7 +84,7 @@ static void ConvImpl(ConvData data, unsigned int hStride = data.m_Parameters.m_StrideY; unsigned int xStride = data.m_Parameters.m_StrideX; - // the world's least efficient convolution + // The world's least efficient convolution. for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) { for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) @@ -93,11 +93,11 @@ static void ConvImpl(ConvData data, { for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) { - // this loop goes over each output element + // This loop goes over each output element. AccumulatorType sum = AccumulatorType(); - // for depthwise, each output channel corresponds to exactly one input channel - // for normal, must loop over each input channel + // For depthwise, each output channel corresponds to exactly one input channel. + // For normal, must loop over each input channel. for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) { unsigned int depthwiseMultiplierIdx = 0; @@ -111,11 +111,11 @@ static void ConvImpl(ConvData data, { for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) { - // this loop goes over each input element for each output element + // This loop goes over each input element for each output element. unsigned int filterIndex; - // since dimensionality of kernel depends on depthwiseness, so does index + // Since dimensionality of kernel depends on depthwiseness, so does index. if (depthwise) { filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput + @@ -138,7 +138,7 @@ static void ConvImpl(ConvData data, AccumulatorType inputValue; - // check if we're in the padding + // Check if we're in the padding. if (yInput < paddingTop || yInput >= heightInput + paddingTop || xInput < paddingLeft || xInput >= widthInput + paddingLeft ) { diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.cpp b/src/armnn/backends/RefWorkloads/FullyConnected.cpp index 8ba11d19c6..1a8263b9a1 100644 --- a/src/armnn/backends/RefWorkloads/FullyConnected.cpp +++ b/src/armnn/backends/RefWorkloads/FullyConnected.cpp @@ -18,11 +18,11 @@ void FullyConnected(const float* inputData, const float* biasData, bool transposeWeights) { - unsigned int N = outputTensorInfo.GetShape()[1]; // Output Vector Size + unsigned int N = outputTensorInfo.GetShape()[1]; // Outputs Vector Size. - BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Need some data + BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Needs some data. - unsigned int K = 1; // Total number of activations in the input + unsigned int K = 1; // Total number of activations in the input. for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++) { K *= inputTensorInfo.GetShape()[i]; diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.hpp b/src/armnn/backends/RefWorkloads/FullyConnected.hpp index 9fa2456110..fa6f54a3ec 100644 --- a/src/armnn/backends/RefWorkloads/FullyConnected.hpp +++ b/src/armnn/backends/RefWorkloads/FullyConnected.hpp @@ -10,7 +10,7 @@ namespace armnn { -/// Performs a matrix multiplication and optionally adds a bias +/// Performs a matrix multiplication and optionally adds a bias. void FullyConnected(const float* inputData, float* outputData, const TensorInfo& inputTensorInfo, diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp index 7d1bfab557..1294d05e08 100644 --- a/src/armnn/backends/RefWorkloads/Merger.hpp +++ b/src/armnn/backends/RefWorkloads/Merger.hpp @@ -29,7 +29,7 @@ void Merger(const MergerQueueDescriptor& data) for (unsigned int i=0; i(0, data))[index] = (GetInputTensorData(viewIdx, data))[inIndex]; - //what should we do if input views overlap on the output tensor? - //we could error, take the average, or shm else... - //for now just stop after finding first view (input) that matches. + //What should we do if input views overlap on the output tensor? + //We could error, take the average, or shm else... + //For now just stop after finding first view (input) that matches. break; } } diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp index a643e67690..4047f061b3 100644 --- a/src/armnn/backends/RefWorkloads/Pooling2d.cpp +++ b/src/armnn/backends/RefWorkloads/Pooling2d.cpp @@ -164,7 +164,7 @@ void Pooling2d(const float* in, Executor execute = GetExecutor(params.m_PoolType); // Check supported padding methods outside the loop to simplify - // the inner loop + // the inner loop. if (params.m_PaddingMethod != PaddingMethod::Exclude && params.m_PaddingMethod != PaddingMethod::IgnoreValue) { @@ -192,7 +192,7 @@ void Pooling2d(const float* in, float result = defaultInitializer; float poolAreaSize = boost::numeric_cast((hend - hstart) * (wend - wstart)); - // special case: when the pooling kernel is over a padding region and the padding + // Special case: when the pooling kernel is over a padding region and the padding // size is larger or equal to the kernel and the kernel only covers // padding and no real values, then we initialize the result as zero // by convention. This is because we need to choose a value here and @@ -208,8 +208,8 @@ void Pooling2d(const float* in, if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude) { - // when we exclude the padding, it means we calculate with a smaller - // kernel size, so I change the divisor here + // When we exclude the padding, it means we calculate with a smaller + // kernel size, so I changed the divisor here. poolAreaSize = boost::numeric_cast((hend - hstart) * (wend - wstart)); } diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.hpp b/src/armnn/backends/RefWorkloads/Pooling2d.hpp index f88b1a0a4e..cefd022fb3 100644 --- a/src/armnn/backends/RefWorkloads/Pooling2d.hpp +++ b/src/armnn/backends/RefWorkloads/Pooling2d.hpp @@ -11,7 +11,7 @@ namespace armnn { -/// Computes the Pooling2d operation +/// Computes the Pooling2d operation. void Pooling2d(const float* in, float* out, const TensorInfo& inputInfo, diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp index 0ede46d9fb..9044fca1c2 100644 --- a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp +++ b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp @@ -13,7 +13,7 @@ namespace armnn { -// Base class template providing an implementation of the Constant layer common to all data types +// Base class template providing an implementation of the Constant layer common to all data types. template class RefBaseConstantWorkload : public TypedWorkload { diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp index c421b0f212..fbc1f07111 100644 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp @@ -12,15 +12,22 @@ namespace armnn { +RefBatchNormalizationFloat32Workload::RefBatchNormalizationFloat32Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Mean(std::make_unique(*(descriptor.m_Mean))), + m_Variance(std::make_unique(*(descriptor.m_Variance))), + m_Beta(std::make_unique(*(descriptor.m_Beta))), + m_Gamma(std::make_unique(*(descriptor.m_Gamma))) {} void RefBatchNormalizationFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute"); - const float* var = m_Data.m_Variance->GetConstTensor(); - const float* mean = m_Data.m_Mean->GetConstTensor(); - const float* gamma = m_Data.m_Gamma->GetConstTensor(); - const float* beta = m_Data.m_Beta->GetConstTensor(); + const float* var = m_Variance->GetConstTensor(); + const float* mean = m_Mean->GetConstTensor(); + const float* gamma = m_Gamma->GetConstTensor(); + const float* beta = m_Beta->GetConstTensor(); auto inputData = GetInputTensorDataFloat(0, m_Data); auto outputData = GetOutputTensorDataFloat(0, m_Data); diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp index cbcdadd749..780c329cc6 100644 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp @@ -14,8 +14,15 @@ namespace armnn class RefBatchNormalizationFloat32Workload : public Float32Workload { public: - using Float32Workload::Float32Workload; + explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); virtual void Execute() const override; + +private: + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Beta; + std::unique_ptr m_Gamma; }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp index 8a48523765..4a8e296619 100644 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp @@ -14,23 +14,30 @@ namespace armnn { +RefBatchNormalizationUint8Workload::RefBatchNormalizationUint8Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Mean(std::make_unique(*(descriptor.m_Mean))), + m_Variance(std::make_unique(*(descriptor.m_Variance))), + m_Beta(std::make_unique(*(descriptor.m_Beta))), + m_Gamma(std::make_unique(*(descriptor.m_Gamma))) {} void RefBatchNormalizationUint8Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute"); const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& varInfo = GetTensorInfo(m_Data.m_Variance); - const TensorInfo& meanInfo = GetTensorInfo(m_Data.m_Mean); - const TensorInfo& gammaInfo = GetTensorInfo(m_Data.m_Gamma); - const TensorInfo& betaInfo = GetTensorInfo(m_Data.m_Beta); + const TensorInfo& varInfo = GetTensorInfo(m_Variance.get()); + const TensorInfo& meanInfo = GetTensorInfo(m_Mean.get()); + const TensorInfo& gammaInfo = GetTensorInfo(m_Gamma.get()); + const TensorInfo& betaInfo = GetTensorInfo(m_Beta.get()); const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); - auto var = Dequantize(m_Data.m_Variance->GetConstTensor(), varInfo); - auto mean = Dequantize(m_Data.m_Mean->GetConstTensor(), meanInfo); - auto gamma = Dequantize(m_Data.m_Gamma->GetConstTensor(), gammaInfo); - auto beta = Dequantize(m_Data.m_Beta->GetConstTensor(), betaInfo); + auto var = Dequantize(m_Variance->GetConstTensor(), varInfo); + auto mean = Dequantize(m_Mean->GetConstTensor(), meanInfo); + auto gamma = Dequantize(m_Gamma->GetConstTensor(), gammaInfo); + auto beta = Dequantize(m_Beta->GetConstTensor(), betaInfo); std::vector results(outputInfo.GetNumElements()); BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data()); diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp index 57fe995ba5..2c12d28c3f 100644 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp @@ -14,8 +14,15 @@ namespace armnn class RefBatchNormalizationUint8Workload : public Uint8Workload { public: - using Uint8Workload::Uint8Workload; + explicit RefBatchNormalizationUint8Workload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); virtual void Execute() const override; + +private: + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Beta; + std::unique_ptr m_Gamma; }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..c4b78014b2 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "RefWorkloadUtils.hpp" +#include "FloatingPointConverter.hpp" + +namespace armnn +{ + +void RefConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute"); + + const Half* const input = GetInputTensorDataHalf(0, m_Data); + float* const output = GetOutputTensorDataFloat(0, m_Data); + + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..34ae35545b --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + using Float16ToFloat32Workload::Float16ToFloat32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..3c93297302 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute"); + + const float* const input = GetInputTensorDataFloat(0, m_Data); + Half* const output = GetOutputTensorDataHalf(0, m_Data); + + // convert Fp32 input to Fp16 output + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..903a50449f --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + using Float32ToFloat16Workload::Float32ToFloat16Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp index 6e4cc69063..4fe823a288 100644 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp @@ -12,6 +12,12 @@ namespace armnn { +RefConvolution2dFloat32Workload::RefConvolution2dFloat32Workload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} void RefConvolution2dFloat32Workload::Execute() const { @@ -19,12 +25,13 @@ void RefConvolution2dFloat32Workload::Execute() const float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Data.m_Weight->template GetConstTensor(); + const float* weightData = m_Weight->template GetConstTensor(); const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Data.m_Bias->template GetConstTensor() : nullptr; + m_Bias->template GetConstTensor() : nullptr; + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl( - m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0); + m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo); } } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp index 514369c262..ecf0082f33 100644 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp @@ -14,8 +14,14 @@ namespace armnn class RefConvolution2dFloat32Workload : public Float32Workload { public: - using Float32Workload::Float32Workload; + explicit RefConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; + }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp index f390baa387..19e9c2ed0a 100644 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp @@ -12,6 +12,12 @@ namespace armnn { +RefConvolution2dUint8Workload::RefConvolution2dUint8Workload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} void RefConvolution2dUint8Workload::Execute() const { @@ -19,20 +25,21 @@ void RefConvolution2dUint8Workload::Execute() const const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor(); - const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight); + const uint8_t* weightsData = m_Weight->template GetConstTensor(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Data.m_Bias->template GetConstTensor() : + m_Bias->template GetConstTensor() : nullptr; uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl( m_Data, inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset()); + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); } } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp index 954a206463..733d2052b2 100644 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp @@ -14,8 +14,15 @@ namespace armnn class RefConvolution2dUint8Workload : public Uint8Workload { public: - using Uint8Workload::Uint8Workload; + explicit RefConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; + }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp index c631fecb66..f3167e299a 100644 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp @@ -12,6 +12,12 @@ namespace armnn { +RefDepthwiseConvolution2dFloat32Workload::RefDepthwiseConvolution2dFloat32Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} void RefDepthwiseConvolution2dFloat32Workload::Execute() const { @@ -19,12 +25,13 @@ void RefDepthwiseConvolution2dFloat32Workload::Execute() const float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Data.m_Weight->template GetConstTensor(); + const float* weightData = m_Weight->template GetConstTensor(); const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Data.m_Bias->template GetConstTensor() : nullptr; + m_Bias->template GetConstTensor() : nullptr; + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl - (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, true); + (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true); } } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp index 34e6524684..042e7b3c0a 100644 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp @@ -14,8 +14,14 @@ namespace armnn class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload { public: - using Float32Workload::Float32Workload; + explicit RefDepthwiseConvolution2dFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp index 5a8fb13112..fd5ade5559 100644 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp @@ -13,26 +13,34 @@ namespace armnn { +RefDepthwiseConvolution2dUint8Workload::RefDepthwiseConvolution2dUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + void RefDepthwiseConvolution2dUint8Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute"); const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor(); - const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight); + const uint8_t* weightsData = m_Weight->template GetConstTensor(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Data.m_Bias->template GetConstTensor() : + m_Bias->template GetConstTensor() : nullptr; uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl( m_Data, inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), true); + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); } } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp index bd9945f529..2c8ed2d084 100644 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp @@ -14,8 +14,13 @@ namespace armnn class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload { public: - using Uint8Workload::Uint8Workload; + explicit RefDepthwiseConvolution2dUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp index 6fe203e5f0..818455e0e9 100644 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp @@ -12,6 +12,12 @@ namespace armnn { +RefFullyConnectedFloat32Workload::RefFullyConnectedFloat32Workload( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} void RefFullyConnectedFloat32Workload::Execute() const { @@ -22,8 +28,8 @@ void RefFullyConnectedFloat32Workload::Execute() const float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Data.m_Weight->GetConstTensor(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Data.m_Bias->GetConstTensor() : nullptr; + const float* weightData = m_Weight->GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->GetConstTensor() : nullptr; FullyConnected(inputData, outputData, diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp index cb835bd2ce..639d935a16 100644 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp @@ -14,8 +14,13 @@ namespace armnn class RefFullyConnectedFloat32Workload : public Float32Workload { public: - using Float32Workload::Float32Workload; + explicit RefFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info); virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp index 0186d3f5e5..cd653657e1 100644 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp @@ -14,6 +14,12 @@ namespace armnn { +RefFullyConnectedUint8Workload::RefFullyConnectedUint8Workload( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} void RefFullyConnectedUint8Workload::Execute() const { @@ -22,18 +28,18 @@ void RefFullyConnectedUint8Workload::Execute() const const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const uint8_t* weightData = m_Data.m_Weight->GetConstTensor(); + const uint8_t* weightData = m_Weight->GetConstTensor(); auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); - auto weight = Dequantize(weightData, m_Data.m_Weight->GetTensorInfo()); + auto weight = Dequantize(weightData, m_Weight->GetTensorInfo()); - std::vector results(inputInfo.GetNumElements()); + std::vector results(outputInfo.GetNumElements()); if (m_Data.m_Parameters.m_BiasEnabled) { - const int32_t* biasData = m_Data.m_Bias->GetConstTensor(); - auto bias = Dequantize(biasData, m_Data.m_Bias->GetTensorInfo()); + const int32_t* biasData = m_Bias->GetConstTensor(); + auto bias = Dequantize(biasData, m_Bias->GetTensorInfo()); FullyConnected(dequant.data(), results.data(), diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp index cd14ea85e0..36e5f631ad 100644 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp @@ -14,8 +14,13 @@ namespace armnn class RefFullyConnectedUint8Workload : public Uint8Workload { public: - using Uint8Workload::Uint8Workload; + explicit RefFullyConnectedUint8Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info); virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; }; } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp new file mode 100644 index 0000000000..bc33638310 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefLstmFloat32Workload.hpp" + +namespace armnn +{ + +void RefLstmFloat32Workload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Ref backend!"); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp new file mode 100644 index 0000000000..0acce4d309 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefLstmFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp index c743207423..f4dff60ae4 100644 --- a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp @@ -17,7 +17,7 @@ namespace armnn { -// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization +// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization. static void NormalizeWithinUingLbr(const float* inputData, float* outputData, const TensorShape& tensorShape, @@ -80,7 +80,7 @@ static void NormalizeWithinUingLbr(const float* inputData, } } -// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization +// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization. void NormalizeAcrossUingLbr(const float* inputData, float* outputData, const TensorShape& tensorShape, diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp index b2bb8fbf3d..93c883d826 100644 --- a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp +++ b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp @@ -7,6 +7,7 @@ #include "RefWorkloadUtils.hpp" #include +#include "TypeUtils.hpp" namespace armnn { diff --git a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp index 088fe819e5..1df735ea55 100644 --- a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp +++ b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp @@ -9,6 +9,7 @@ #include #include +#include #include @@ -70,6 +71,18 @@ float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data) return GetOutputTensorData(idx, data); } +template +const Half* GetInputTensorDataHalf(unsigned int idx, const PayloadType& data) +{ + return GetInputTensorData(idx, data); +} + +template +Half* GetOutputTensorDataHalf(unsigned int idx, const PayloadType& data) +{ + return GetOutputTensorData(idx, data); +} + //////////////////////////////////////////// /// u8 helpers //////////////////////////////////////////// diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp index 7b386ed467..d8bca4be44 100644 --- a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp +++ b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp @@ -27,7 +27,7 @@ inline float Lerp(float a, float b, float w) void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo) { - // We follow the definition of TensorFlow and AndroidNN: The top-left corner of a texel in the output + // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output // image is projected into the input image to figure out the interpolants and weights. Note that this // will yield different results than if projecting the centre of output texels. @@ -39,8 +39,8 @@ void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, co const unsigned int outputHeight = outputInfo.GetShape()[2]; const unsigned int outputWidth = outputInfo.GetShape()[3]; - // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates - // in the input image + // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates + // in the input image. const float scaleY = boost::numeric_cast(inputHeight) / boost::numeric_cast(outputHeight); const float scaleX = boost::numeric_cast(inputWidth) / boost::numeric_cast(outputWidth); @@ -53,33 +53,33 @@ void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, co { for (unsigned int y = 0; y < outputHeight; ++y) { - // Corresponding real-valued height coordinate in input image + // Corresponding real-valued height coordinate in input image. const float iy = boost::numeric_cast(y) * scaleY; - // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation) + // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation). const float fiy = floorf(iy); const unsigned int y0 = boost::numeric_cast(fiy); - // Interpolation weight (range [0,1]) + // Interpolation weight (range [0,1]). const float yw = iy - fiy; for (unsigned int x = 0; x < outputWidth; ++x) { - // Real-valued and discrete width coordinates in input image + // Real-valued and discrete width coordinates in input image. const float ix = boost::numeric_cast(x) * scaleX; const float fix = floorf(ix); const unsigned int x0 = boost::numeric_cast(fix); - // Interpolation weight (range [0,1]) + // Interpolation weight (range [0,1]). const float xw = ix - fix; - // Discrete width/height coordinates of texels below and to the right of (x0, y0) + // Discrete width/height coordinates of texels below and to the right of (x0, y0). const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u); const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u); // Interpolation - const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0 - const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1 + const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0. + const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1. const float l = Lerp(ly0, ly1, yw); output.Get(n, c, y, x) = l; diff --git a/src/armnn/backends/RefWorkloads/Softmax.cpp b/src/armnn/backends/RefWorkloads/Softmax.cpp index 58840e3076..c9f0bc5e59 100644 --- a/src/armnn/backends/RefWorkloads/Softmax.cpp +++ b/src/armnn/backends/RefWorkloads/Softmax.cpp @@ -11,13 +11,13 @@ namespace armnn { -/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta) { unsigned int numChannels = tensorInfo.GetShape()[1]; for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++) { - // find maximum channel + // Find maximum channel. float max = in[n * numChannels]; for (unsigned int c = 1; c < numChannels; c++) { @@ -28,7 +28,7 @@ void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float be } } - // exponentiate all values and sum + // Exponentiate all values and sum. std::vector exponentials(numChannels); float sum = 0.0f; for (unsigned int c = 0; c < numChannels; c++) @@ -38,7 +38,7 @@ void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float be sum += exponentials[c]; } - // divide exponentials by sum to give outputs + // Divide exponentials by sum to give outputs. for (unsigned int c = 0; c < numChannels; c++) { out[n * numChannels + c] = exponentials[c] / sum; diff --git a/src/armnn/backends/RefWorkloads/Softmax.hpp b/src/armnn/backends/RefWorkloads/Softmax.hpp index c508ab2b82..f75388dc2b 100644 --- a/src/armnn/backends/RefWorkloads/Softmax.hpp +++ b/src/armnn/backends/RefWorkloads/Softmax.hpp @@ -10,7 +10,7 @@ namespace armnn { -/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta); } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp index bd5da6cfe2..c12d9368bf 100644 --- a/src/armnn/backends/RefWorkloads/Splitter.hpp +++ b/src/armnn/backends/RefWorkloads/Splitter.hpp @@ -31,7 +31,7 @@ void Splitter(const SplitterQueueDescriptor& data) for (unsigned int i = 0; i(viewIdx, data); BOOST_ASSERT(outputData); diff --git a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp index 3994c1f1de..ad0f38e867 100644 --- a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp +++ b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp @@ -10,7 +10,7 @@ namespace armnn { -// Utility class providing access to raw tensor memory based on indices along each dimension +// Utility class providing access to raw tensor memory based on indices along each dimension. template class TensorBufferArrayView { -- cgit v1.2.1