From 38e05bd2836b1b65b440330a9c283038ba4192c3 Mon Sep 17 00:00:00 2001 From: Jan Eilers Date: Wed, 26 Jun 2019 13:10:09 +0100 Subject: IVGCVSW-3236 Extend Ref LSTM with layer normalization support * Add descriptor values * Update lstm queue descriptor validate function * Update lstm workload * Update isLstmSupported (Cl and Ref), LayerSupportBase, ILayerSupport * Update lstm layer * Add unit tests Signed-off-by: Jan Eilers Change-Id: I932175d550facfb342325051eaa7bd2084ebdc18 Signed-off-by: Jan Eilers --- src/backends/reference/workloads/LstmUtils.hpp | 204 +++++-------------------- 1 file changed, 37 insertions(+), 167 deletions(-) (limited to 'src/backends/reference/workloads/LstmUtils.hpp') diff --git a/src/backends/reference/workloads/LstmUtils.hpp b/src/backends/reference/workloads/LstmUtils.hpp index db02a84a45..f6aff8b69f 100644 --- a/src/backends/reference/workloads/LstmUtils.hpp +++ b/src/backends/reference/workloads/LstmUtils.hpp @@ -8,211 +8,81 @@ #include "BaseIterator.hpp" #include -namespace -{ - // Helper functions ported from the Android code base // Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc + +void VectorBatchVectorAdd(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult ); + +// Layer norm for each batch. +// normalization_epsilon is added to avoid divergence. +void MeanStddevNormalization(armnn::Decoder& input_vector, + armnn::Encoder& output_vector, + uint32_t v_size, + uint32_t n_batch, + float normalization_epsilon); + +void ZeroVector(armnn::Encoder& vector, + uint32_t vSize); + void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder& matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder& vector, uint32_t nBatch, - armnn::Encoder& outResult) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - for (uint32_t r = 0; r < mRows; r++) - { - vector += b * mCols; - for (uint32_t c = 0; c < mCols; c++) - { - outResult.Set(outResult.Get() + matrix.Get() * vector.Get()); - ++matrix; - ++vector; - } - outResult += 1; - vector -= (b+1) * mCols; - } - matrix -= (mRows * mCols); - } - outResult -= (mRows * nBatch); -} + armnn::Encoder& outResult); void VectorBatchVectorAssign(armnn::Decoder& vector, uint32_t vSize, uint32_t nBatch, - armnn::Encoder& outBatchVector) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - for (uint32_t v = 0; v < vSize; v++) - { - outBatchVector.Set(vector.Get()); - ++outBatchVector; - ++vector; - } - vector -= vSize; - } - outBatchVector -= (nBatch * vSize); -} + armnn::Encoder& outBatchVector); void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder& vector, uint32_t vSize, armnn::Decoder& batchVector, uint32_t nBatch, - armnn::Encoder& outResult) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(outResult.Get() + vector.Get() * batchVector.Get()); - ++outResult; - ++vector; - ++batchVector; - } - vector -= vSize; - } - batchVector -= vSize * nBatch; - outResult -= vSize * nBatch; -} + armnn::Encoder& outResult); + +void VectorBatchVectorCwiseProduct(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult); void Sub1Vector(armnn::Decoder& vector, uint32_t vSize, - armnn::Encoder& result) -{ - for (uint32_t v = 0; v < vSize; v++) - { - result.Set(1.0f - vector.Get()); - ++vector; - ++result; - } - vector -= vSize; - result -= vSize; -} + armnn::Encoder& result); + void VectorVectorCwiseProduct(armnn::Decoder& vector1, armnn::Decoder& vector2, uint32_t vSize, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(vector1.Get() * vector2.Get()); - ++outResult; - ++vector1; - ++vector2; - } - outResult -= vSize; - vector1 -= vSize; - vector2 -= vSize; -} + armnn::Encoder& outResult); void VectorVectorCwiseProductAccumulate(armnn::Decoder& vector1, armnn::Decoder& vector2, uint32_t vSize, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(outResult.Get() + vector1.Get() * vector2.Get()); - ++outResult; - ++vector1; - ++vector2; - } - outResult -= vSize; - vector1 -= vSize; - vector2 -= vSize; -} + armnn::Encoder& outResult); float Clip(float f, - float absLimit) -{ - float result = (absLimit < f) ? absLimit : f; - result = (-absLimit > result) ? -absLimit : result; - return result; -} + float absLimit); void ClipVector(armnn::Decoder& vector, uint32_t vSize, float absLimit, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(Clip(vector.Get(), absLimit)); - ++vector; - ++outResult; - } - vector -= vSize; - outResult -= vSize; -} + armnn::Encoder& outResult); void CopyVector(armnn::Decoder& vector, uint32_t vSize, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(vector.Get()); - ++outResult; - ++vector; - } - outResult -= vSize; - vector -= vSize; -} + armnn::Encoder& outResult); void SetActivationParameters(uint32_t activation, armnn::ActivationFunction& outArmnnActivation, float& outA, - float& outB) -{ - switch (activation) - { - case 0: // None - outA = 0; - outB = 0; - return; - - case 1: // Relu - outArmnnActivation = armnn::ActivationFunction::ReLu; - outA = 0; - outB = 0; - return; - - case 3: // Relu6 - outArmnnActivation = armnn::ActivationFunction::BoundedReLu; - outA = 6; - outB = 0; - return; - - case 4: // Tanh - outArmnnActivation = armnn::ActivationFunction::TanH; - outA = 1; - outB = 1; - return; - - case 6: // Sigmoid - outArmnnActivation = armnn::ActivationFunction::Sigmoid; - outA = 0; - outB = 0; - return; - - default: - throw armnn::Exception("Unsupported activation function: " + std::to_string(activation)); - } -} - -std::unique_ptr AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr) -{ - if (!ptr) - { - return nullptr; - } - - return std::make_unique(*ptr); -} - -} // anonymous namespace + float& outB); + +std::unique_ptr AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr); -- cgit v1.2.1