From 38e05bd2836b1b65b440330a9c283038ba4192c3 Mon Sep 17 00:00:00 2001 From: Jan Eilers Date: Wed, 26 Jun 2019 13:10:09 +0100 Subject: IVGCVSW-3236 Extend Ref LSTM with layer normalization support * Add descriptor values * Update lstm queue descriptor validate function * Update lstm workload * Update isLstmSupported (Cl and Ref), LayerSupportBase, ILayerSupport * Update lstm layer * Add unit tests Signed-off-by: Jan Eilers Change-Id: I932175d550facfb342325051eaa7bd2084ebdc18 Signed-off-by: Jan Eilers --- src/backends/reference/workloads/LstmUtils.cpp | 307 +++++++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 src/backends/reference/workloads/LstmUtils.cpp (limited to 'src/backends/reference/workloads/LstmUtils.cpp') diff --git a/src/backends/reference/workloads/LstmUtils.cpp b/src/backends/reference/workloads/LstmUtils.cpp new file mode 100644 index 0000000000..f197aae291 --- /dev/null +++ b/src/backends/reference/workloads/LstmUtils.cpp @@ -0,0 +1,307 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +//#pragma once + +#include "LstmUtils.hpp" +#include "BaseIterator.hpp" +#include + + +// Helper functions ported from the Android code base +// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc + +void VectorBatchVectorAdd(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult ) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(batchVector.Get() + vector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + + +// Layer norm for each batch. +// normalization_epsilon is added to avoid divergence. +void MeanStddevNormalization(armnn::Decoder& input_vector, + armnn::Encoder& output_vector, + uint32_t v_size, + uint32_t n_batch, + float normalization_epsilon) +{ + for (uint32_t batch = 0; batch < n_batch; ++batch) { + float sum = 0.0f; + float sum_sq = 0.0f; + for (uint32_t i = 0; i < v_size; ++i) { + sum += input_vector.Get(); + sum_sq += input_vector.Get() * input_vector.Get(); + ++input_vector; + } + input_vector -= v_size; + + const float mean = sum / static_cast(v_size); + float stddev_inv = 0.0f; + const float variance = sum_sq / static_cast(v_size) - mean * mean; + if (variance == 0) { + stddev_inv = 1.0f / std::sqrt(normalization_epsilon); + } else { + stddev_inv = 1.0f / std::sqrt(variance); + } + + for (uint32_t i = 0; i < v_size; ++i) { + output_vector.Set((input_vector.Get() - mean) * stddev_inv); + ++output_vector; + ++input_vector; + } + // Don't reset iterator to handle next batch + } + output_vector -= v_size * n_batch; + input_vector -= v_size * n_batch; +} + +void ZeroVector(armnn::Encoder& vector, + uint32_t vSize) +{ + for (uint32_t v = 0; v < vSize; v++) + { + vector.Set(0.0f); + ++vector; + } + vector -= vSize; +} + +void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder& matrix, + uint32_t mRows, + uint32_t mCols, + armnn::Decoder& vector, + uint32_t nBatch, + armnn::Encoder& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t r = 0; r < mRows; r++) + { + vector += b * mCols; + for (uint32_t c = 0; c < mCols; c++) + { + outResult.Set(outResult.Get() + matrix.Get() * vector.Get()); + ++matrix; + ++vector; + } + outResult += 1; + vector -= (b+1) * mCols; + } + matrix -= (mRows * mCols); + } + outResult -= (mRows * nBatch); +} + +void VectorBatchVectorAssign(armnn::Decoder& vector, + uint32_t vSize, + uint32_t nBatch, + armnn::Encoder& outBatchVector) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outBatchVector.Set(vector.Get()); + ++outBatchVector; + ++vector; + } + vector -= vSize; + } + outBatchVector -= (nBatch * vSize); +} + +void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(outResult.Get() + vector.Get() * batchVector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + +void VectorBatchVectorCwiseProduct(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector.Get() * batchVector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + +void Sub1Vector(armnn::Decoder& vector, + uint32_t vSize, + armnn::Encoder& result) +{ + for (uint32_t v = 0; v < vSize; v++) + { + result.Set(1.0f - vector.Get()); + ++vector; + ++result; + } + vector -= vSize; + result -= vSize; +} + +void VectorVectorCwiseProduct(armnn::Decoder& vector1, + armnn::Decoder& vector2, + uint32_t vSize, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector1.Get() * vector2.Get()); + ++outResult; + ++vector1; + ++vector2; + } + outResult -= vSize; + vector1 -= vSize; + vector2 -= vSize; +} + +void VectorVectorCwiseProductAccumulate(armnn::Decoder& vector1, + armnn::Decoder& vector2, + uint32_t vSize, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(outResult.Get() + vector1.Get() * vector2.Get()); + ++outResult; + ++vector1; + ++vector2; + } + outResult -= vSize; + vector1 -= vSize; + vector2 -= vSize; +} + +float Clip(float f, + float absLimit) +{ + float result = (absLimit < f) ? absLimit : f; + result = (-absLimit > result) ? -absLimit : result; + return result; +} + +void ClipVector(armnn::Decoder& vector, + uint32_t vSize, + float absLimit, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(Clip(vector.Get(), absLimit)); + ++vector; + ++outResult; + } + vector -= vSize; + outResult -= vSize; +} + +void CopyVector(armnn::Decoder& vector, + uint32_t vSize, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector.Get()); + ++outResult; + ++vector; + } + outResult -= vSize; + vector -= vSize; +} + +void SetActivationParameters(uint32_t activation, + armnn::ActivationFunction& outArmnnActivation, + float& outA, + float& outB) +{ + switch (activation) + { + case 0: // None + outA = 0; + outB = 0; + return; + + case 1: // Relu + outArmnnActivation = armnn::ActivationFunction::ReLu; + outA = 0; + outB = 0; + return; + + case 3: // Relu6 + outArmnnActivation = armnn::ActivationFunction::BoundedReLu; + outA = 6; + outB = 0; + return; + + case 4: // Tanh + outArmnnActivation = armnn::ActivationFunction::TanH; + outA = 1; + outB = 1; + return; + + case 6: // Sigmoid + outArmnnActivation = armnn::ActivationFunction::Sigmoid; + outA = 0; + outB = 0; + return; + + default: + throw armnn::Exception("Unsupported activation function: " + std::to_string(activation)); + } +} + +std::unique_ptr AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr) +{ + if (!ptr) + { + return nullptr; + } + + return std::make_unique(*ptr); +} -- cgit v1.2.1