diff options
Diffstat (limited to 'src/armnn/backends')
313 files changed, 0 insertions, 32434 deletions
diff --git a/src/armnn/backends/ArmComputeTensorUtils.cpp b/src/armnn/backends/ArmComputeTensorUtils.cpp deleted file mode 100644 index ba9fb40cfc..0000000000 --- a/src/armnn/backends/ArmComputeTensorUtils.cpp +++ /dev/null @@ -1,135 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "ArmComputeTensorUtils.hpp" -#include "ArmComputeUtils.hpp" - -#include <armnn/Descriptors.hpp> - -namespace armnn -{ -namespace armcomputetensorutils -{ - -arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType) -{ - switch(dataType) - { - case armnn::DataType::Float16: - return arm_compute::DataType::F16; - case armnn::DataType::Float32: - return arm_compute::DataType::F32; - case armnn::DataType::QuantisedAsymm8: - return arm_compute::DataType::QASYMM8; - case armnn::DataType::Signed32: - return arm_compute::DataType::S32; - default: - BOOST_ASSERT_MSG(false, "Unknown data type"); - return arm_compute::DataType::UNKNOWN; - } -} - -arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape) -{ - arm_compute::TensorShape shape; - - // armnn tensors are (batch, channels, height, width). - // arm_compute tensors are (width, height, channels, batch). - for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++) - { - // Note that our dimensions are stored in the opposite order to ACL's. - shape.set(tensorShape.GetNumDimensions() - i - 1, tensorShape[i]); - - // TensorShape::set() flattens leading ones, so that batch size 1 cannot happen. - // arm_compute tensors expect this. - } - - // prevent arm_compute issue where tensor is flattened to nothing - if (shape.num_dimensions() == 0) - { - shape.set_num_dimensions(1); - } - - return shape; -} - -// Utility function used to build a TensorInfo object, that can be used to initialise -// ARM Compute Tensor and CLTensor allocators. -arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo) -{ - const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape()); - const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType()); - const arm_compute::QuantizationInfo aclQuantizationInfo(tensorInfo.GetQuantizationScale(), - tensorInfo.GetQuantizationOffset()); - - return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo); -} - -arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor) -{ - using arm_compute::PoolingType; - using arm_compute::DimensionRoundingType; - using arm_compute::PadStrideInfo; - using arm_compute::PoolingLayerInfo; - using arm_compute::Size2D; - - // Resolve ARM Compute layer parameters. - const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType); - - bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0); - //use specific constructor if global pooling - if(isGlobalPooling) - { - return arm_compute::PoolingLayerInfo(poolingType); - } - - const DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType( - descriptor.m_OutputShapeRounding); - const PadStrideInfo padStrideInfo(descriptor.m_StrideX, - descriptor.m_StrideY, - descriptor.m_PadLeft, - descriptor.m_PadRight, - descriptor.m_PadTop, - descriptor.m_PadBottom, - rounding); - - const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude); - - const Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight); - - return arm_compute::PoolingLayerInfo(poolingType, poolSize, padStrideInfo, excludePadding); -} - -arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor) -{ - const arm_compute::NormType normType = - ConvertNormalizationAlgorithmChannelToAclNormType(descriptor.m_NormChannelType); - return arm_compute::NormalizationLayerInfo(normType, - descriptor.m_NormSize, - descriptor.m_Alpha, - descriptor.m_Beta, - descriptor.m_K, - false); -} - -arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm) -{ - arm_compute::PermutationVector aclPerm; - - unsigned int start = 0; - while ((start < perm.GetSize()) && (start == perm[start])) - { - ++start; - } - - for (unsigned int i = start; i < perm.GetSize(); ++i) - { - aclPerm.set(i - start, perm[i] - start); - } - - return aclPerm; -} - -} // namespace armcomputetensorutils -} // namespace armnn diff --git a/src/armnn/backends/ArmComputeTensorUtils.hpp b/src/armnn/backends/ArmComputeTensorUtils.hpp deleted file mode 100644 index 572e310ecf..0000000000 --- a/src/armnn/backends/ArmComputeTensorUtils.hpp +++ /dev/null @@ -1,199 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/Tensor.hpp> -#include <armnn/DescriptorsFwd.hpp> - -#include <arm_compute/core/ITensor.h> -#include <arm_compute/core/TensorInfo.h> -#include <arm_compute/core/Types.h> - -#include <boost/cast.hpp> - -namespace armnn -{ -class ITensorHandle; - -namespace armcomputetensorutils -{ - -/// Utility function to map an armnn::DataType to corresponding arm_compute::DataType. -arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType); - -/// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape. -arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape); - -/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given -/// armnn::ITensorInfo. -arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo); - -/// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor. -arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor); - -/// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor. -arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc); - -/// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector. -arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector); - -/// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor. -template <typename Descriptor> -arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor) -{ - return arm_compute::PadStrideInfo(descriptor.m_StrideX, - descriptor.m_StrideY, - descriptor.m_PadLeft, - descriptor.m_PadRight, - descriptor.m_PadTop, - descriptor.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); -} - -/// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor. -template <typename Tensor> -void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo) -{ - tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo)); -} - -template <typename Tensor> -void InitialiseArmComputeTensorEmpty(Tensor& tensor) -{ - tensor.allocator()->allocate(); -} - -/// Utility function to free unused tensors after a workload is configured and prepared -template <typename Tensor> -void FreeTensorIfUnused(std::unique_ptr<Tensor>& tensor) -{ - if (tensor && !tensor->is_used()) - { - tensor.reset(nullptr); - } -} - -// Helper function to obtain byte offset into tensor data -inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info, - uint32_t batchIndex, - uint32_t channelIndex, - uint32_t y, - uint32_t x) -{ - arm_compute::Coordinates coords; - coords.set(3, static_cast<int>(batchIndex)); - coords.set(2, static_cast<int>(channelIndex)); - coords.set(1, static_cast<int>(y)); - coords.set(0, static_cast<int>(x)); - return info.offset_element_in_bytes(coords); -} - -// Helper function to obtain element offset into data buffer representing tensor data (assuming no strides). -inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info, - uint32_t batchIndex, - uint32_t channelIndex, - uint32_t y, - uint32_t x) -{ - const arm_compute::TensorShape& shape = info.tensor_shape(); - uint32_t width = static_cast<uint32_t>(shape[0]); - uint32_t height = static_cast<uint32_t>(shape[1]); - uint32_t numChannels = static_cast<uint32_t>(shape[2]); - return ((batchIndex * numChannels + channelIndex) * height + y) * width + x; -} - -template <typename T> -void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData) -{ - // If MaxNumOfTensorDimensions is increased, this loop will need fixing. - static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); - { - const arm_compute::ITensorInfo& info = *srcTensor.info(); - const arm_compute::TensorShape& shape = info.tensor_shape(); - const uint8_t* const bufferPtr = srcTensor.buffer(); - uint32_t width = static_cast<uint32_t>(shape[0]); - uint32_t height = static_cast<uint32_t>(shape[1]); - uint32_t numChannels = static_cast<uint32_t>(shape[2]); - uint32_t numBatches = static_cast<uint32_t>(shape[3]); - - for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) - { - for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) - { - for (unsigned int y = 0; y < height; ++y) - { - // Copies one row from arm_compute tensor buffer to linear memory buffer. - // A row is the largest contiguous region we can copy, as the tensor data may be using strides. - memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), - bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), - width * sizeof(T)); - } - } - } - } -} - -template <typename T> -void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor) -{ - // If MaxNumOfTensorDimensions is increased, this loop will need fixing. - static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); - { - const arm_compute::ITensorInfo& info = *dstTensor.info(); - const arm_compute::TensorShape& shape = info.tensor_shape(); - uint8_t* const bufferPtr = dstTensor.buffer(); - uint32_t width = static_cast<uint32_t>(shape[0]); - uint32_t height = static_cast<uint32_t>(shape[1]); - uint32_t numChannels = static_cast<uint32_t>(shape[2]); - uint32_t numBatches = static_cast<uint32_t>(shape[3]); - - for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) - { - for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) - { - for (unsigned int y = 0; y < height; ++y) - { - // Copies one row from linear memory buffer to arm_compute tensor buffer. - // A row is the largest contiguous region we can copy, as the tensor data may be using strides. - memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), - srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), - width * sizeof(T)); - } - } - } - } -} - -/// Construct a TensorShape object from an ArmCompute object based on arm_compute::Dimensions. -/// \tparam ArmComputeType Any type that implements the Dimensions interface -/// \tparam T Shape value type -/// \param shapelike An ArmCompute object that implements the Dimensions interface -/// \param initial A default value to initialise the shape with -/// \return A TensorShape object filled from the Acl shapelike object. -template<typename ArmComputeType, typename T> -TensorShape GetTensorShape(const ArmComputeType& shapelike, T initial) -{ - std::vector<unsigned int> s(MaxNumOfTensorDimensions, initial); - for (unsigned int i=0; i < shapelike.num_dimensions(); ++i) - { - s[(shapelike.num_dimensions()-1)-i] = boost::numeric_cast<unsigned int>(shapelike[i]); - } - return TensorShape(boost::numeric_cast<unsigned int>(shapelike.num_dimensions()), s.data()); -}; - -/// Get the strides from an ACL strides object -inline TensorShape GetStrides(const arm_compute::Strides& strides) -{ - return GetTensorShape(strides, 0U); -} - -/// Get the shape from an ACL shape object -inline TensorShape GetShape(const arm_compute::TensorShape& shape) -{ - return GetTensorShape(shape, 1U); -} - -} // namespace armcomputetensorutils -} // namespace armnn diff --git a/src/armnn/backends/ArmComputeUtils.hpp b/src/armnn/backends/ArmComputeUtils.hpp deleted file mode 100644 index db472964ea..0000000000 --- a/src/armnn/backends/ArmComputeUtils.hpp +++ /dev/null @@ -1,125 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#if ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED - -#include <armnn/Tensor.hpp> -#include <armnn/Descriptors.hpp> - -#include <arm_compute/core/Types.h> - -namespace armnn -{ - -inline arm_compute::NormalizationLayerInfo -CreateAclNormalizationLayerInfoForL2Normalization(const armnn::TensorInfo& tensorInfo) -{ - const unsigned int depth = tensorInfo.GetShape()[1]; - - // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of - // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose - // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization - // parameters. - // - // Please refer to both the reference implementation of the normalization layer and the implementation of - // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below. - - // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd. - // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in - // ACL's normalization_layer_cross_map() CL function. - const uint32_t normSize = depth * 2u + 1u; - - // See ACL's NormalizationLayerInfo::scale_coeff() definition. - // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead. - const float alpha = 1.0f; - - // Don't offset the reduction. - const float kappa = 0.0f; - - // pow(reduction, -0.5) = 1 / sqrt(reduction) - const float beta = 0.5f; - - return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false); -} - -inline arm_compute::ActivationLayerInfo::ActivationFunction -ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction) -{ - using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction; - - switch (armnnFunction) - { - case ActivationFunction::Linear: return AclActivationFunction::LINEAR; - // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function. - case ActivationFunction::Sigmoid: return AclActivationFunction::LOGISTIC; - case ActivationFunction::ReLu: return AclActivationFunction::RELU; - case ActivationFunction::BoundedReLu: return AclActivationFunction::LU_BOUNDED_RELU; - case ActivationFunction::SoftReLu: return AclActivationFunction::SOFT_RELU; - case ActivationFunction::LeakyReLu: return AclActivationFunction::LEAKY_RELU; - case ActivationFunction::Abs: return AclActivationFunction::ABS; - case ActivationFunction::Sqrt: return AclActivationFunction::SQRT; - case ActivationFunction::Square: return AclActivationFunction::SQUARE; - case ActivationFunction::TanH: return AclActivationFunction::TANH; - default: throw InvalidArgumentException("Unsupported activation function"); - } -} - -inline arm_compute::ActivationLayerInfo -ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor& actDesc) -{ - return arm_compute::ActivationLayerInfo(ConvertActivationFunctionToAclActivationFunction(actDesc.m_Function), - actDesc.m_A, actDesc.m_B); -} - -inline arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType(PoolingAlgorithm poolingAlgorithm) -{ - using arm_compute::PoolingType; - - switch (poolingAlgorithm) - { - case PoolingAlgorithm::Max: return PoolingType::MAX; - case PoolingAlgorithm::Average: return PoolingType::AVG; - case PoolingAlgorithm::L2: return PoolingType::L2; - default: throw InvalidArgumentException("Unsupported pooling algorithm"); - } -} - -inline arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType(OutputShapeRounding - rounding) -{ - using arm_compute::DimensionRoundingType; - - switch (rounding) - { - case OutputShapeRounding::Ceiling: return DimensionRoundingType::CEIL; - case OutputShapeRounding::Floor: return DimensionRoundingType::FLOOR; - default: throw InvalidArgumentException("Unsupported Output Shape Rounding type"); - } -} - -inline arm_compute::NormType -ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel channelType) -{ - using arm_compute::NormType; - switch (channelType) - { - case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP; - case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D; - default: throw InvalidArgumentException("Unsupported normalization algorithm channel type"); - } -} - -inline arm_compute::FullyConnectedLayerInfo -ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc) -{ - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix; - return fc_info; -} - -} - -#endif // ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED diff --git a/src/armnn/backends/ClContextControl.cpp b/src/armnn/backends/ClContextControl.cpp deleted file mode 100644 index e8b21c942d..0000000000 --- a/src/armnn/backends/ClContextControl.cpp +++ /dev/null @@ -1,235 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClContextControl.hpp" - -#include "armnn/Exceptions.hpp" - -#ifdef ARMCOMPUTECL_ENABLED -#include <arm_compute/core/CL/CLKernelLibrary.h> -#include <arm_compute/runtime/CL/CLScheduler.h> -#endif - -#include <boost/assert.hpp> -#include <boost/format.hpp> -#include <boost/log/trivial.hpp> -#include <boost/polymorphic_cast.hpp> -#include <boost/core/ignore_unused.hpp> - -#include "LeakChecking.hpp" - -namespace cl -{ -class Context; -class CommandQueue; -class Device; -} - -namespace armnn -{ - -ClContextControl::ClContextControl(IGpuAccTunedParameters* clTunedParameters, - bool profilingEnabled) - : m_clTunedParameters(boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters)) - , m_ProfilingEnabled(profilingEnabled) -{ - // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled. - boost::ignore_unused(m_ProfilingEnabled); - -#ifdef ARMCOMPUTECL_ENABLED - try - { - std::vector<cl::Platform> platforms; - cl::Platform::get(&platforms); - - // Selects default platform for the first element. - cl::Platform::setDefault(platforms[0]); - - std::vector<cl::Device> devices; - platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); - - // Selects default device for the first element. - cl::Device::setDefault(devices[0]); - } - catch (const cl::Error& clError) - { - throw ClRuntimeUnavailableException(boost::str(boost::format( - "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" - ) % clError.what() % clError.err())); - } - - // Removes the use of global CL context. - cl::Context::setDefault(cl::Context{}); - BOOST_ASSERT(cl::Context::getDefault()() == NULL); - - // Removes the use of global CL command queue. - cl::CommandQueue::setDefault(cl::CommandQueue{}); - BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL); - - // Always load the OpenCL runtime. - LoadOpenClRuntime(); -#endif -} - -ClContextControl::~ClContextControl() -{ -#ifdef ARMCOMPUTECL_ENABLED - // Load the OpencCL runtime without the tuned parameters to free the memory for them. - try - { - UnloadOpenClRuntime(); - } - catch (const cl::Error& clError) - { - // This should not happen, it is ignored if it does. - - // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an - // exception of type std::length_error. - // Using stderr instead in this context as there is no point in nesting try-catch blocks here. - std::cerr << "A CL error occurred unloading the runtime tuner parameters: " - << clError.what() << ". CL error code is: " << clError.err() << std::endl; - } -#endif -} - -void ClContextControl::LoadOpenClRuntime() -{ - DoLoadOpenClRuntime(true); -} - -void ClContextControl::UnloadOpenClRuntime() -{ - DoLoadOpenClRuntime(false); -} - -void ClContextControl::DoLoadOpenClRuntime(bool useTunedParameters) -{ -#ifdef ARMCOMPUTECL_ENABLED - cl::Device device = cl::Device::getDefault(); - cl::Context context; - cl::CommandQueue commandQueue; - - if (arm_compute::CLScheduler::get().context()() != NULL) - { - // Wait for all queued CL requests to finish before reinitialising it. - arm_compute::CLScheduler::get().sync(); - } - - try - { - arm_compute::CLKernelLibrary::get().clear_programs_cache(); - // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no - // context references); it is initialised again, with a proper context, later. - arm_compute::CLScheduler::get().init(context, commandQueue, device); - arm_compute::CLKernelLibrary::get().init(".", context, device); - - { - // - // Here we replace the context with a new one in which - // the memory leak checks show it as an extra allocation but - // because of the scope of the leak checks, it doesn't count - // the disposal of the original object. On the other hand it - // does count the creation of this context which it flags - // as a memory leak. By adding the following line we prevent - // this to happen. - // - ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE(); - context = cl::Context(device); - } - - // NOTE: In this specific case profiling has to be enabled on the command queue - // in order for the CLTuner to work. - bool profilingNeededForClTuner = useTunedParameters && m_clTunedParameters && - m_clTunedParameters->m_Mode == IGpuAccTunedParameters::Mode::UpdateTunedParameters; - - if (m_ProfilingEnabled || profilingNeededForClTuner) - { - // Create a new queue with profiling enabled. - commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); - } - else - { - // Use default queue. - commandQueue = cl::CommandQueue(context, device); - } - } - catch (const cl::Error& clError) - { - throw ClRuntimeUnavailableException(boost::str(boost::format( - "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" - ) % clError.what() % clError.err())); - } - - // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. - arm_compute::CLKernelLibrary::get().init(".", context, device); - - arm_compute::ICLTuner* tuner = nullptr; - if (useTunedParameters && m_clTunedParameters) - { - tuner = &m_clTunedParameters->m_Tuner; - } - arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner); -#endif -} - -void ClContextControl::ClearClCache() -{ - DoLoadOpenClRuntime(true); -} - -armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode) -{ - return new ClTunedParameters(mode); -} - -armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode) -{ - return IGpuAccTunedParametersPtr(CreateRaw(mode), &IGpuAccTunedParameters::Destroy); -} - -void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) -{ - delete params; -} - -ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode) - : m_Mode(mode) -#ifdef ARMCOMPUTECL_ENABLED - , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) -#endif -{ -} - -void ClTunedParameters::Load(const char* filename) -{ -#ifdef ARMCOMPUTECL_ENABLED - try - { - m_Tuner.load_from_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + - e.what()); - } -#endif -} - -void ClTunedParameters::Save(const char* filename) const -{ -#ifdef ARMCOMPUTECL_ENABLED - try - { - m_Tuner.save_to_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + - e.what()); - } -#endif -} - -} // namespace armnn diff --git a/src/armnn/backends/ClContextControl.hpp b/src/armnn/backends/ClContextControl.hpp deleted file mode 100644 index 5ac56423bd..0000000000 --- a/src/armnn/backends/ClContextControl.hpp +++ /dev/null @@ -1,62 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "armnn/IRuntime.hpp" - -#ifdef ARMCOMPUTECL_ENABLED -#include <arm_compute/runtime/CL/CLTuner.h> -#endif - -namespace armnn -{ - -class IGpuAccTunedParameters; -class ClTunedParameters; - -// ARM Compute OpenCL context control. -class ClContextControl -{ -public: - - ClContextControl(IGpuAccTunedParameters* clTunedParameters = nullptr, - bool profilingEnabled = false); - - virtual ~ClContextControl(); - - void LoadOpenClRuntime(); - - // Users should call this (after freeing all of the cl::Context objects they use) - // to release the cached memory used by the compute library. - void UnloadOpenClRuntime(); - - // Clear the CL cache, without losing the tuned parameter settings. - void ClearClCache(); - -private: - - void DoLoadOpenClRuntime(bool useTunedParameters); - - ClTunedParameters* m_clTunedParameters; - - bool m_ProfilingEnabled; -}; - -class ClTunedParameters : public IGpuAccTunedParameters -{ -public: - ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode); - - virtual void Load(const char* filename); - virtual void Save(const char* filename) const; - - Mode m_Mode; - -#ifdef ARMCOMPUTECL_ENABLED - arm_compute::CLTuner m_Tuner; -#endif -}; - -} // namespace armnn diff --git a/src/armnn/backends/ClLayerSupport.cpp b/src/armnn/backends/ClLayerSupport.cpp deleted file mode 100644 index 30a1330706..0000000000 --- a/src/armnn/backends/ClLayerSupport.cpp +++ /dev/null @@ -1,468 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "LayerSupportCommon.hpp" - -#include "ClLayerSupport.hpp" -#include "InternalTypes.hpp" -#include <armnn/Descriptors.hpp> -#include <armnn/Types.hpp> -#include <armnn/Tensor.hpp> - -#include <boost/core/ignore_unused.hpp> - -#ifdef ARMCOMPUTECL_ENABLED -#include "ClWorkloads/ClAdditionWorkload.hpp" -#include "ClWorkloads/ClActivationFloatWorkload.hpp" -#include "ClWorkloads/ClBatchNormalizationFloatWorkload.hpp" -#include "ClWorkloads/ClConvertFp16ToFp32Workload.hpp" -#include "ClWorkloads/ClConvertFp32ToFp16Workload.hpp" -#include "ClWorkloads/ClConvolution2dBaseWorkload.hpp" -#include "ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp" -#include "ClWorkloads/ClDivisionFloatWorkload.hpp" -#include "ClWorkloads/ClL2NormalizationFloatWorkload.hpp" -#include "ClWorkloads/ClMultiplicationFloatWorkload.hpp" -#include "ClWorkloads/ClFullyConnectedWorkload.hpp" -#include "ClWorkloads/ClPooling2dBaseWorkload.hpp" -#include "ClWorkloads/ClPermuteWorkload.hpp" -#include "ClWorkloads/ClNormalizationFloatWorkload.hpp" -#include "ClWorkloads/ClSoftmaxBaseWorkload.hpp" -#include "ClWorkloads/ClSubtractionWorkload.hpp" -#include "ClWorkloads/ClLstmFloatWorkload.hpp" -#endif - -using namespace boost; - -namespace armnn -{ -namespace -{ -template<unsigned int FilterSize> -bool IsMatchingSize2d(const TensorInfo& weightInfo) -{ - // Width & Height must match. - return (weightInfo.GetShape()[3] == FilterSize) && (weightInfo.GetShape()[2] == FilterSize); -} - -template<uint32_t ValidStride> -bool IsMatchingStride(uint32_t actualStride) -{ - return ValidStride == actualStride; -} - -template<uint32_t FirstStride, uint32_t SecondStride, uint32_t... ValidStrides> -bool IsMatchingStride(uint32_t actualStride) -{ - return IsMatchingStride<FirstStride>(actualStride) || IsMatchingStride<SecondStride, ValidStrides...>(actualStride); -}; - -bool IsClBackendSupported(std::string* reasonIfUnsupported) -{ -#if ARMCOMPUTECL_ENABLED - return true; -#else - if (reasonIfUnsupported != nullptr) - { - *reasonIfUnsupported = "The armnn library has been built without CL support"; - } - return false; -#endif -} - -#if ARMCOMPUTECL_ENABLED -#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) (expr) -#else -#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) IsClBackendSupported(reasonIfUnsupported) -#endif - -#if ARMCOMPUTECL_ENABLED -template<class FuncType, class... Args> -inline bool IsWorkloadSupported(FuncType&& func, std::string* reasonIfUnsupported, Args&&... args) -{ - arm_compute::Status aclStatus = func(std::forward<Args>(args)...); - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - return supported; -} - -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); -#else -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsClBackendSupported(reasonIfUnsupported); -#endif - -} //namespace - -template<typename FloatFunc, typename Uint8Func, typename ... Params> -bool IsSupportedForDataTypeCl(std::string* reasonIfUnsupported, - DataType dataType, - FloatFunc floatFuncPtr, - Uint8Func uint8FuncPtr, - Params&&... params) -{ - return IsClBackendSupported(reasonIfUnsupported) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - dataType, - floatFuncPtr, - floatFuncPtr, - uint8FuncPtr, - std::forward<Params>(params)...); -} - -bool IsActivationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClActivationWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool IsAdditionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return FORWARD_CL_LAYER_SUPPORT_FUNC(ClAdditionValidate(input0, - input1, - output, - reasonIfUnsupported)); -} - -bool IsBatchNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchNormalizationValidate, - reasonIfUnsupported, - input, - output, - mean, - var, - beta, - gamma, - descriptor); -} - -bool IsConstantSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) -{ - bool isSupported = false; - - bool strideXIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideX); - bool strideXIsThree = IsMatchingStride<3>(desc.m_StrideX); - - bool strideYIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideY); - bool strideYIsThree = IsMatchingStride<3>(desc.m_StrideY); - - bool strideIsOneOrTwo = strideXIsOneOrTwo && strideYIsOneOrTwo; - bool strideIsOneOrTwoOrThree = ( strideXIsOneOrTwo || strideXIsThree ) && ( strideYIsOneOrTwo || strideYIsThree ); - - // 1x1 convolution with strides of 1,2,3. - isSupported |= IsMatchingSize2d<1>(weightInfo) && ( strideIsOneOrTwoOrThree ); - - // 3x3 convolution with strides of 1,2. - isSupported |= IsMatchingSize2d<3>(weightInfo) && ( strideIsOneOrTwo ); - - // 5x5 convolution with strides of 1,2 - isSupported |= IsMatchingSize2d<5>(weightInfo) && ( strideIsOneOrTwo ); - - //Fall back to normal convolution for the asymmetric padding case. - if (desc.m_PadLeft != desc.m_PadRight || - desc.m_PadTop != desc.m_PadBottom) - { - //Direct convolution does not support asymmetric padding yet. - isSupported = false; - } - - return isSupported; -} - -bool IsDirectConvolution2dParamsSupportedCl(std::string* reasonIfUnsupported, - const Convolution2dDescriptor& parameters, - const TensorInfo& weightInfo) -{ - return IsClDirectConvolution2dSupported(weightInfo, parameters); -} - -bool IsConvolution2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvolution2dWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthwiseConvolutionWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool IsDivisionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool IsSubtractionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return FORWARD_CL_LAYER_SUPPORT_FUNC(ClSubtractionValidate(input0, - input1, - output, - reasonIfUnsupported)); -} - -bool IsFullyConnectedSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClFullyConnectedWorkloadValidate, - reasonIfUnsupported, - input, - output, - weights, - biases, - descriptor); -} - -bool IsInputSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsL2NormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output); -} - -bool IsMergerSupportedCl(const std::vector<const TensorInfo*> inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeCl(reasonIfUnsupported, - inputs[0]->GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsMultiplicationSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool IsNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsOutputSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsPermuteSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(output); - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPermuteWorkloadValidate, reasonIfUnsupported, descriptor); -} - -bool IsPooling2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsResizeBilinearSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsSoftmaxSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSoftmaxWorkloadValidate, reasonIfUnsupported, input, output); -} - -bool IsSplitterSupportedCl(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeCl(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsFakeQuantizationSupportedCl(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(descriptor); - return false; -} - -bool IsReshapeSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - return true; -} - -bool IsFloorSupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - return IsClBackendSupported(reasonIfUnsupported) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - input.GetDataType(), - &FalseFuncF16<>, - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate, reasonIfUnsupported, - input, outputStateIn, cellStateIn, scratchBuffer, outputStateOut, cellStateOut, - output, descriptor, inputToForgetWeights, inputToCellWeights, - inputToOutputWeights, recurrentToForgetWeights, - recurrentToCellWeights, recurrentToOutputWeights, - forgetGateBias, cellBias, outputGateBias, - inputToInputWeights, recurrentToInputWeights, - cellToInputWeights, inputGateBias, projectionWeights, - projectionBias, cellToForgetWeights, cellToOutputWeights); -} - -bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp16ToFp32WorkloadValidate, - reasonIfUnsupported, - input, - output, - reasonIfUnsupported); -} - -bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp32ToFp16WorkloadValidate, - reasonIfUnsupported, - input, - output, - reasonIfUnsupported); -} - -bool IsMeanSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - return false; -} - -} diff --git a/src/armnn/backends/ClLayerSupport.hpp b/src/armnn/backends/ClLayerSupport.hpp deleted file mode 100644 index f5c1226e56..0000000000 --- a/src/armnn/backends/ClLayerSupport.hpp +++ /dev/null @@ -1,158 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/DescriptorsFwd.hpp> -#include <armnn/Types.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/ArmNN.hpp> - -#include <boost/optional.hpp> - -namespace armnn -{ -bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); -bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, - const DepthwiseConvolution2dDescriptor& parameters, - const TensorInfo& weights); - -bool IsActivationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsAdditionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsBatchNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsConstantSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvolution2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported = nullptr); - -bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported = nullptr); - -bool IsDivisionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsSubtractionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsFullyConnectedSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsInputSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsL2NormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); - -bool IsMergerSupportedCl(const std::vector<const TensorInfo*> inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsMultiplicationSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsOutputSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsPermuteSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsPooling2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsResizeBilinearSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsSoftmaxSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsSplitterSupportedCl(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsFakeQuantizationSupportedCl(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsReshapeSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsFloorSupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsMeanSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -} diff --git a/src/armnn/backends/ClTensorHandle.hpp b/src/armnn/backends/ClTensorHandle.hpp deleted file mode 100644 index 9c78192284..0000000000 --- a/src/armnn/backends/ClTensorHandle.hpp +++ /dev/null @@ -1,136 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "OutputHandler.hpp" -#include "ArmComputeTensorUtils.hpp" - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/runtime/CL/CLSubTensor.h> -#include <arm_compute/runtime/CL/CLMemoryGroup.h> -#include <arm_compute/runtime/IMemoryGroup.h> -#include <arm_compute/core/TensorShape.h> -#include <arm_compute/core/Coordinates.h> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ - - -class IClTensorHandle : public ITensorHandle -{ -public: - virtual arm_compute::ICLTensor& GetTensor() = 0; - virtual arm_compute::ICLTensor const& GetTensor() const = 0; - virtual arm_compute::DataType GetDataType() const = 0; - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0; -}; - -class ClTensorHandle : public IClTensorHandle -{ -public: - ClTensorHandle(const TensorInfo& tensorInfo) - { - armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); - } - - arm_compute::CLTensor& GetTensor() override { return m_Tensor; } - arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } - virtual void Allocate() override {armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);} - - virtual void Manage() override - { - assert(m_MemoryGroup != nullptr); - m_MemoryGroup->manage(&m_Tensor); - } - - virtual const void* Map(bool blocking = true) const override - { - const_cast<arm_compute::CLTensor*>(&m_Tensor)->map(blocking); - return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - virtual void Unmap() const override { const_cast<arm_compute::CLTensor*>(&m_Tensor)->unmap(); } - - virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } - - virtual ITensorHandle* GetParent() const override { return nullptr; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override - { - m_MemoryGroup = boost::polymorphic_pointer_downcast<arm_compute::CLMemoryGroup>(memoryGroup); - } - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } -private: - arm_compute::CLTensor m_Tensor; - std::shared_ptr<arm_compute::CLMemoryGroup> m_MemoryGroup; -}; - -class ClSubTensorHandle : public IClTensorHandle -{ -public: - ClSubTensorHandle(IClTensorHandle* parent, - const arm_compute::TensorShape& shape, - const arm_compute::Coordinates& coords) - : m_Tensor(&parent->GetTensor(), shape, coords) - { - parentHandle = parent; - } - - arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } - arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } - - virtual void Allocate() override {} - virtual void Manage() override {} - - virtual const void* Map(bool blocking = true) const override - { - const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->map(blocking); - return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - virtual void Unmap() const override { const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->unmap(); } - - virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } - - virtual ITensorHandle* GetParent() const override { return parentHandle; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {} - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } - -private: - mutable arm_compute::CLSubTensor m_Tensor; - ITensorHandle* parentHandle = nullptr; - -}; - -} diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp deleted file mode 100644 index 591fb85dbb..0000000000 --- a/src/armnn/backends/ClWorkloadFactory.cpp +++ /dev/null @@ -1,490 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "ClWorkloadFactory.hpp" - -#include "armnn/Exceptions.hpp" -#include "armnn/Utils.hpp" - -#include <string> -#include "CpuTensorHandle.hpp" -#include "Layer.hpp" - -#ifdef ARMCOMPUTECL_ENABLED -#include <arm_compute/core/CL/CLKernelLibrary.h> -#include <arm_compute/runtime/CL/CLBufferAllocator.h> -#include <arm_compute/runtime/CL/CLScheduler.h> - -#include "ClWorkloads.hpp" - -#include "backends/MemCopyWorkload.hpp" -#include "backends/ClTensorHandle.hpp" - -#include "memory/IPoolManager.hpp" -#endif - -#include "MakeWorkloadHelper.hpp" - -#include <boost/polymorphic_cast.hpp> -#include <boost/format.hpp> -#include <boost/log/trivial.hpp> - -namespace armnn -{ - -bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, - boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported) -{ - return IWorkloadFactory::IsLayerSupported(Compute::GpuAcc, layer, dataType, outReasonIfUnsupported); -} - -#ifdef ARMCOMPUTECL_ENABLED - -ClWorkloadFactory::ClWorkloadFactory() -: m_MemoryManager(std::make_unique<arm_compute::CLBufferAllocator>()) -{ -} - -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo); - tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); - - return tensorHandle; -} - -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const -{ - BOOST_ASSERT(parent.GetType() == ITensorHandle::CL); - - arm_compute::Coordinates coords; - arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); - - coords.set_num_dimensions(subTensorShape.GetNumDimensions()); - for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) - { - // Arm compute indexes tensor coords in reverse order. - unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; - coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); - } - - return std::make_unique<ClSubTensorHandle>( - boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClActivationFloatWorkload, ClActivationUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSoftmaxFloatWorkload, ClSoftmaxUint8Workload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSplitterFloatWorkload, ClSplitterUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClMergerFloatWorkload, ClMergerUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateFullyConnected( - const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<ClFullyConnectedWorkload, ClFullyConnectedWorkload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClPermuteFloatWorkload, ClPermuteUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClPooling2dFloatWorkload, ClPooling2dUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClConvolution2dFloatWorkload, ClConvolution2dUint8Workload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<ClDepthwiseConvolutionFloatWorkload, ClDepthwiseConvolutionUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClAdditionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, - ClAdditionWorkload<armnn::DataType::QuantisedAsymm8>>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMultiplication( - const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<ClMultiplicationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateDivision( - const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSubtractionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, - ClSubtractionWorkload<armnn::DataType::QuantisedAsymm8>>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateBatchNormalization( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) - { - throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); - } - - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateResizeBilinear( - const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClResizeBilinearFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFakeQuantization( - const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClConstantFloatWorkload, ClConstantUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClReshapeFloatWorkload, ClReshapeUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<ClConvertFp16ToFp32Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<ClConvertFp32ToFp16Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -void ClWorkloadFactory::Finalize() -{ - m_MemoryManager.Finalize(); -} - -void ClWorkloadFactory::Release() -{ - m_MemoryManager.Release(); -} - -void ClWorkloadFactory::Acquire() -{ - m_MemoryManager.Acquire(); -} - -#else // #if ARMCOMPUTECL_ENABLED - -ClWorkloadFactory::ClWorkloadFactory() -{ -} - -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - return nullptr; -} - -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -void ClWorkloadFactory::Finalize() -{ -} - -void ClWorkloadFactory::Release() -{ -} - -void ClWorkloadFactory::Acquire() -{ -} - -#endif // #if ARMCOMPUTECL_ENABLED - -} // namespace armnn diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp deleted file mode 100644 index 892d564fbb..0000000000 --- a/src/armnn/backends/ClWorkloadFactory.hpp +++ /dev/null @@ -1,136 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "OutputHandler.hpp" - -#include "armnn/IRuntime.hpp" -#include <boost/optional.hpp> - -#include "memory/BaseMemoryManager.hpp" - -namespace armnn -{ - -// ARM Compute OpenCL workload factory. -class ClWorkloadFactory : public IWorkloadFactory -{ -public: - ClWorkloadFactory(); - - virtual Compute GetCompute() const override { return Compute::GpuAcc; } - - static bool IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported); - - virtual bool SupportsSubTensors() const override { return true; } - - virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const override; - - virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; - - virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& Info) const override; - - virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual void Finalize() override; - - virtual void Release() override; - - virtual void Acquire() override; - -private: - -#ifdef ARMCOMPUTECL_ENABLED - mutable ClMemoryManager m_MemoryManager; -#endif -}; - -} // namespace armnn diff --git a/src/armnn/backends/ClWorkloads.hpp b/src/armnn/backends/ClWorkloads.hpp deleted file mode 100644 index 2bbda8a62b..0000000000 --- a/src/armnn/backends/ClWorkloads.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once -#include "backends/ClWorkloads/ClActivationFloatWorkload.hpp" -#include "backends/ClWorkloads/ClActivationUint8Workload.hpp" -#include "backends/ClWorkloads/ClAdditionWorkload.hpp" -#include "backends/ClWorkloads/ClBaseConstantWorkload.hpp" -#include "backends/ClWorkloads/ClBaseMergerWorkload.hpp" -#include "backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp" -#include "backends/ClWorkloads/ClConstantFloatWorkload.hpp" -#include "backends/ClWorkloads/ClConstantUint8Workload.hpp" -#include "backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp" -#include "backends/ClWorkloads/ClConvolution2dUint8Workload.hpp" -#include "backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp" -#include "backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp" -#include "backends/ClWorkloads/ClDivisionFloatWorkload.hpp" -#include "backends/ClWorkloads/ClFloorFloatWorkload.hpp" -#include "backends/ClWorkloads/ClFullyConnectedWorkload.hpp" -#include "backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp" -#include "backends/ClWorkloads/ClLstmFloatWorkload.hpp" -#include "backends/ClWorkloads/ClMergerFloatWorkload.hpp" -#include "backends/ClWorkloads/ClMergerUint8Workload.hpp" -#include "backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp" -#include "backends/ClWorkloads/ClNormalizationFloatWorkload.hpp" -#include "backends/ClWorkloads/ClPermuteWorkload.hpp" -#include "backends/ClWorkloads/ClPooling2dFloatWorkload.hpp" -#include "backends/ClWorkloads/ClPooling2dUint8Workload.hpp" -#include "backends/ClWorkloads/ClReshapeFloatWorkload.hpp" -#include "backends/ClWorkloads/ClReshapeUint8Workload.hpp" -#include "backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp" -#include "backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp" -#include "backends/ClWorkloads/ClSoftmaxUint8Workload.hpp" -#include "backends/ClWorkloads/ClSplitterFloatWorkload.hpp" -#include "backends/ClWorkloads/ClSplitterUint8Workload.hpp" -#include "backends/ClWorkloads/ClSubtractionWorkload.hpp" -#include "backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp" -#include "backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp" diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClActivationFloatWorkload.cpp deleted file mode 100644 index 97078bddd8..0000000000 --- a/src/armnn/backends/ClWorkloads/ClActivationFloatWorkload.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClActivationFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/ArmComputeUtils.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); - - if (input.GetDataType() == DataType::QuantisedAsymm8 && - activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) - { - return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, - "CL: Logistic Activations unsupported with QAsymm8 data type."}; - } - - return arm_compute::CLActivationLayer::validate(&aclInput, - &aclOutput, - activationLayerInfo); -} - -ClActivationFloatWorkload::ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<ActivationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClActivationFloatWorkload", 1, 1); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); - - arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_ActivationLayer.configure(&input, &output, activationLayerInfo); -} - -void ClActivationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloatWorkload_Execute"); - m_ActivationLayer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClActivationFloatWorkload.hpp deleted file mode 100644 index e1b6fe13d8..0000000000 --- a/src/armnn/backends/ClWorkloads/ClActivationFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ -arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor); - -// Activation layer execution. -class ClActivationFloatWorkload : public FloatWorkload<ActivationQueueDescriptor> -{ -public: - ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLActivationLayer m_ActivationLayer; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp deleted file mode 100644 index f39c856aa9..0000000000 --- a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClActivationUint8Workload.hpp" -#include "backends/ClLayerSupport.hpp" - -#include "backends/ArmComputeUtils.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : Uint8Workload<ActivationQueueDescriptor>(descriptor, info) -{ - auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); - arm_compute::ActivationLayerInfo layerInfo(activation, - m_Data.m_Parameters.m_A, - m_Data.m_Parameters.m_B); - - m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_ActivationLayer.configure(&input, &output, layerInfo); -} - -void ClActivationUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute"); - - m_ActivationLayer.run(); -} - -} //namespace Armnn - - diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp deleted file mode 100644 index bb2ff58853..0000000000 --- a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -// Activation layer execution. -class ClActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> -{ -public: - ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLActivationLayer m_ActivationLayer; -}; - -} //namespace armnn - - - diff --git a/src/armnn/backends/ClWorkloads/ClAdditionWorkload.cpp b/src/armnn/backends/ClWorkloads/ClAdditionWorkload.cpp deleted file mode 100644 index dd439d59a9..0000000000 --- a/src/armnn/backends/ClWorkloads/ClAdditionWorkload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClAdditionWorkload.hpp" - -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -template <armnn::DataType... T> -ClAdditionWorkload<T...>::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<AdditionQueueDescriptor, T...>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); -} - -template <armnn::DataType... T> -void ClAdditionWorkload<T...>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); - m_Layer.run(); -} - -bool ClAdditionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo, - g_AclConvertPolicy); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return supported; -} - -} //namespace armnn - -template class armnn::ClAdditionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; -template class armnn::ClAdditionWorkload<armnn::DataType::QuantisedAsymm8>; diff --git a/src/armnn/backends/ClWorkloads/ClAdditionWorkload.hpp b/src/armnn/backends/ClWorkloads/ClAdditionWorkload.hpp deleted file mode 100644 index b4706890d1..0000000000 --- a/src/armnn/backends/ClWorkloads/ClAdditionWorkload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -template <armnn::DataType... dataTypes> -class ClAdditionWorkload : public TypedWorkload<AdditionQueueDescriptor, dataTypes...> -{ -public: - ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticAddition m_Layer; -}; - -bool ClAdditionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported); -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp deleted file mode 100644 index 021d17512f..0000000000 --- a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBaseConstantWorkload.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "Half.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -template class ClBaseConstantWorkload<DataType::Float16, DataType::Float32>; -template class ClBaseConstantWorkload<DataType::QuantisedAsymm8>; - -template<armnn::DataType... dataTypes> -void ClBaseConstantWorkload<dataTypes...>::Execute() const -{ - // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data - // on the first inference, then reused for subsequent inferences. - // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not - // have been configured at the time. - if (!m_RanOnce) - { - const ConstantQueueDescriptor& data = this->m_Data; - - BOOST_ASSERT(data.m_LayerOutput != nullptr); - arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor(); - arm_compute::DataType computeDataType = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetDataType(); - - switch (computeDataType) - { - case arm_compute::DataType::F16: - { - CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<Half>(), output); - break; - } - case arm_compute::DataType::F32: - { - CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<float>(), output); - break; - } - case arm_compute::DataType::QASYMM8: - { - CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output); - break; - } - default: - { - BOOST_ASSERT_MSG(false, "Unknown data type"); - break; - } - } - - m_RanOnce = true; - } -} - - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp deleted file mode 100644 index ca1db389dc..0000000000 --- a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ -template <armnn::DataType... DataTypes> -class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataTypes...> -{ -public: - ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) - : TypedWorkload<ConstantQueueDescriptor, DataTypes...>(descriptor, info) - , m_RanOnce(false) - { - } - - void Execute() const override; - -private: - mutable bool m_RanOnce; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp deleted file mode 100644 index 420e074217..0000000000 --- a/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -// Base class template providing an implementation of the Merger layer common to all data types. -template <armnn::DataType... DataTypes> -class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...> -{ -public: - using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload; - - void Execute() const override - { - // With subtensors, merger is a no-op. - } -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp deleted file mode 100644 index 41f382cac8..0000000000 --- a/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -// Base class template providing an implementation of the Splitter layer common to all data types. -template <armnn::DataType... DataTypes> -class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...> -{ -public: - using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload; - - void Execute() const override - { - // With subtensors, merger is a no-op. - } -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp deleted file mode 100644 index 021734aaa6..0000000000 --- a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBatchNormalizationFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ClLayerSupport.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor &desc) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); - const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); - const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); - const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); - - return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, - &aclOutputInfo, - &aclMeanInfo, - &aclVarInfo, - &aclBetaInfo, - &aclGammaInfo, - desc.m_Eps); -} - -ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info) -{ - m_Mean = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); - - m_Variance = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); - - m_Gamma = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); - - m_Beta = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); - - m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, - &output, - m_Mean.get(), - m_Variance.get(), - m_Beta.get(), - m_Gamma.get(), - m_Data.m_Parameters.m_Eps); - - InitializeArmComputeClTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); - InitializeArmComputeClTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); - InitializeArmComputeClTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); - InitializeArmComputeClTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_Layer.prepare(); - FreeUnusedTensors(); -} - -void ClBatchNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); - m_Layer.run(); -} - -void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_Mean); - FreeTensorIfUnused(m_Variance); - FreeTensorIfUnused(m_Gamma); - FreeTensorIfUnused(m_Beta); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp deleted file mode 100644 index 22c71b1073..0000000000 --- a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& desc); - -class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor> -{ -public: - ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - - using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLBatchNormalizationLayer m_Layer; - - std::unique_ptr<arm_compute::CLTensor> m_Mean; - std::unique_ptr<arm_compute::CLTensor> m_Variance; - std::unique_ptr<arm_compute::CLTensor> m_Gamma; - std::unique_ptr<arm_compute::CLTensor> m_Beta; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClConstantFloatWorkload.cpp deleted file mode 100644 index 1565047c22..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConstantFloatWorkload.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConstantFloatWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClConstantFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloatWorkload_Execute"); - ClBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClConstantFloatWorkload.hpp deleted file mode 100644 index 0cbeaad9ea..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConstantFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseConstantWorkload.hpp" - -namespace armnn -{ -class ClConstantFloatWorkload : public ClBaseConstantWorkload<DataType::Float16, DataType::Float32> -{ -public: - using ClBaseConstantWorkload<DataType::Float16, DataType::Float32>::ClBaseConstantWorkload; - void Execute() const override; -}; - - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp deleted file mode 100644 index a5ef0321cd..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConstantUint8Workload.hpp" -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClConstantUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute"); - ClBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp deleted file mode 100644 index 30556dc0d6..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseConstantWorkload.hpp" - -namespace armnn -{ - -class ClConstantUint8Workload : public ClBaseConstantWorkload<DataType::QuantisedAsymm8> -{ -public: - using ClBaseConstantWorkload<DataType::QuantisedAsymm8>::ClBaseConstantWorkload; - void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp deleted file mode 100644 index 534249aeac..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvertFp16ToFp32Workload.hpp" -#include "backends/ClTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( - const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : - Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); -} - -void ClConvertFp16ToFp32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); - m_Layer.run(); -} - -arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - if (input.GetDataType() != DataType::Float16) - { - *reasonIfUnsupported = "Input should be Float16"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - if (output.GetDataType() != DataType::Float32) - { - *reasonIfUnsupported = "Output should be Float32"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( - &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return aclStatus; -} - - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp deleted file mode 100644 index c72d2262b3..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor> -{ -public: - - ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLDepthConvertLayer m_Layer; -}; - -arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported); - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp deleted file mode 100644 index 73b3cbc542..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvertFp32ToFp16Workload.hpp" -#include "backends/ClTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( - const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : - Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); -} - -void ClConvertFp32ToFp16Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); - m_Layer.run(); -} - -arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - if (input.GetDataType() != DataType::Float32) - { - *reasonIfUnsupported = "Input should be Float32"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - if (output.GetDataType() != DataType::Float16) - { - *reasonIfUnsupported = "Output should be Float16"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( - &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return aclStatus; -} - - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp deleted file mode 100644 index fb6af02070..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor> -{ -public: - - ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLDepthConvertLayer m_Layer; -}; - -arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported); - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp deleted file mode 100644 index 228f17d54e..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvolution2dBaseWorkload.hpp" -#include "backends/ClLayerSupport.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -#include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h> - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.is_initialized()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); - - return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - layerInfo); -} - -} diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp deleted file mode 100644 index a983dba79a..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> -#include <armnn/Descriptors.hpp> - -#include <boost/optional.hpp> - -#include <arm_compute/core/Error.h> - -namespace armnn -{ - -arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases); - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp deleted file mode 100644 index 029f41d5dc..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvolution2dFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ClLayerSupport.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClConvolution2dFloatWorkload::ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<Convolution2dQueueDescriptor>(descriptor, info) - , m_ConvolutionLayer(memoryManager) -{ - - // todo: check tensor shapes match. - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo); - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - } - - m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_ConvolutionLayer.configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - - InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_ConvolutionLayer.prepare(); - FreeUnusedTensors(); -} - -void ClConvolution2dFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute"); - - m_ConvolutionLayer.run(); -} - -void ClConvolution2dFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp deleted file mode 100644 index 28ba53f38a..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ - -class ClConvolution2dFloatWorkload : public FloatWorkload<Convolution2dQueueDescriptor> -{ -public: - ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; - - std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp deleted file mode 100644 index e6783b698a..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvolution2dUint8Workload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ClLayerSupport.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info) - , m_ConvolutionLayer(memoryManager) -{ - // todo: check tensor shapes match - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo); - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - } - - m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_ConvolutionLayer.configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - - InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); - - if (m_BiasTensor) - { - InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_ConvolutionLayer.prepare(); - FreeUnusedTensors(); -} - -void ClConvolution2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute"); - - m_ConvolutionLayer.run(); -} - -void ClConvolution2dUint8Workload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp deleted file mode 100644 index f1f008b1b9..0000000000 --- a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ - -class ClConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor> -{ -public: - ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; - - std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp deleted file mode 100644 index 0e89a68118..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp +++ /dev/null @@ -1,125 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthwiseConvolutionBaseWorkload.hpp" - -#include "TypeUtils.hpp" - -#include "backends/ArmComputeUtils.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.is_initialized()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - aclPadStrideInfo, - aclDepthMultiplier); -} - -template<armnn::DataType... dataTypes> -ClDepthwiseConvolutionBaseWorkload<dataTypes...>::ClDepthwiseConvolutionBaseWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>(descriptor, info) -{ - auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - std::string name = std::string("ClDepthwiseConvolution") + - GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload"; - m_Data.ValidateInputsOutputs(name, 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - const unsigned int depthMultiplier = weightInfo.GetShape()[0]; - - //Check for optimisation opportunities. - bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3); - if (use3x3Optimisation) - { - m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>(); - static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_DepthwiseConvolutionLayer.get())->configure( - &input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - depthMultiplier); - } - else - { - m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>(); - static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure( - &input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - depthMultiplier); - } - - BOOST_ASSERT(m_DepthwiseConvolutionLayer); -} - -template<armnn::DataType... dataTypes> -void ClDepthwiseConvolutionBaseWorkload<dataTypes...>::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -// Generate known implementations for linker -template class ClDepthwiseConvolutionBaseWorkload<DataType::Float16, DataType::Float32>; -template class ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8>; - -} // namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp deleted file mode 100644 index 49a8b5d357..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include <boost/optional.hpp> - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases); - -template<armnn::DataType... dataTypes> -class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...> -{ -public: - using TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>::m_Data; - - ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - -protected: - std::unique_ptr<arm_compute::IFunction> m_DepthwiseConvolutionLayer; - - std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp deleted file mode 100644 index 635ae1f327..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthwiseConvolutionFloatWorkload.hpp" - -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClDepthwiseConvolutionFloatWorkload::ClDepthwiseConvolutionFloatWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClDepthwiseConvolutionBaseWorkload(descriptor, info) -{ - InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); - } - - m_DepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void ClDepthwiseConvolutionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloatWorkload_Execute"); - BOOST_ASSERT(m_DepthwiseConvolutionLayer); - - m_DepthwiseConvolutionLayer->run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp deleted file mode 100644 index 4f9d5f332e..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClDepthwiseConvolutionBaseWorkload.hpp" - -namespace armnn -{ - -class ClDepthwiseConvolutionFloatWorkload : public ClDepthwiseConvolutionBaseWorkload<DataType::Float16, - DataType::Float32> -{ -public: - ClDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - void Execute() const override; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp deleted file mode 100644 index af5836e908..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthwiseConvolutionUint8Workload.hpp" - -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClDepthwiseConvolutionBaseWorkload(descriptor, info) -{ - InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<uint8_t>()); - - if (m_BiasTensor) - { - InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>()); - } - - m_DepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void ClDepthwiseConvolutionUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute"); - BOOST_ASSERT(m_DepthwiseConvolutionLayer); - - m_DepthwiseConvolutionLayer->run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp deleted file mode 100644 index b9f676de94..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClDepthwiseConvolutionBaseWorkload.hpp" - -namespace armnn -{ - -class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8> -{ -public: - ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - void Execute() const override; -}; - -} //namespace armnn - - diff --git a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp deleted file mode 100644 index 2371789035..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDivisionFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); -} - - -ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<DivisionQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - // Construct - m_ArithmeticDivision.configure(&input0, &input1, &output); -} - -void ClDivisionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); - - // Executes the layer. - m_ArithmeticDivision.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp deleted file mode 100644 index d34e11dab8..0000000000 --- a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor> -{ -public: - ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const - WorkloadInfo& info); - - using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClFloorFloatWorkload.cpp deleted file mode 100644 index d090a7da81..0000000000 --- a/src/armnn/backends/ClWorkloads/ClFloorFloatWorkload.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClFloorFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload<FloorQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void ClFloorFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClFloorFloatWorkload.hpp deleted file mode 100644 index f269bcf30c..0000000000 --- a/src/armnn/backends/ClWorkloads/ClFloorFloatWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -class ClFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor> -{ -public: - ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLFloor m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedWorkload.cpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedWorkload.cpp deleted file mode 100644 index 8d2fd0e909..0000000000 --- a/src/armnn/backends/ClWorkloads/ClFullyConnectedWorkload.cpp +++ /dev/null @@ -1,111 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClFullyConnectedWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/ClLayerSupport.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiases; - arm_compute::TensorInfo *optionalAclBiases = nullptr; - if (descriptor.m_BiasEnabled) - { - aclBiases = BuildArmComputeTensorInfo(biases); - optionalAclBiases = &aclBiases; - } - - const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); - - return arm_compute::CLFullyConnectedLayer::validate(&aclInput, - &aclWeights, - optionalAclBiases, - &aclOutput, - fullyConnectedLayerInfo); -} - -ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) - , m_FullyConnectedLayer(memoryManager) -{ - m_WeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasesTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); - } - - m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - // Construct - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; - m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); - - // Allocate - if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) - { - InitialiseArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); - } - else - { - InitializeArmComputeClTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); - } - - if (m_BiasesTensor) - { - if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) - { - InitialiseArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); - } - else - { - InitializeArmComputeClTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); - } - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_FullyConnectedLayer.prepare(); - FreeUnusedTensors(); -} - -void ClFullyConnectedWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); - m_FullyConnectedLayer.run(); -} - -void ClFullyConnectedWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedWorkload.hpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedWorkload.hpp deleted file mode 100644 index a61610992e..0000000000 --- a/src/armnn/backends/ClWorkloads/ClFullyConnectedWorkload.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ - -arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor); - -class ClFullyConnectedWorkload : public armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor> -{ -public: - ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, - const armnn::WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - using armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data; - void Execute() const override; - -private: - mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; - - std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp deleted file mode 100644 index 4ccaae3430..0000000000 --- a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClL2NormalizationFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeUtils.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - - arm_compute::NormalizationLayerInfo normalizationInfo = - CreateAclNormalizationLayerInfoForL2Normalization(input); - - return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); -} - -ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); -} - -void ClL2NormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn - - - diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp deleted file mode 100644 index f3f7de110a..0000000000 --- a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output); - -class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor> -{ -public: - ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - // Purposely not a CLL2Normalize function. See constructor. - mutable arm_compute::CLNormalizationLayer m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/ClWorkloads/ClLstmFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClLstmFloatWorkload.cpp deleted file mode 100644 index 09a34c2d02..0000000000 --- a/src/armnn/backends/ClWorkloads/ClLstmFloatWorkload.cpp +++ /dev/null @@ -1,408 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClLstmFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ClLayerSupport.hpp" - -#include <arm_compute/runtime/CL/functions/CLLSTMLayer.h> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) - : FloatWorkload<LstmQueueDescriptor>(descriptor, info) -{ - arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param; - - // Basic parameters - m_InputToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); - - m_InputToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); - - m_InputToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); - - m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); - - m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); - - m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); - - m_ForgetGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); - - m_CellBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); - - m_OutputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); - - // for future reference: check the AndroidNN API for the logic here - if (!m_Data.m_Parameters.m_CifgEnabled) - { - m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); - - m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); - - m_CellToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - if (m_Data.m_CellToInputWeights != nullptr) - { - BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo()); - } - - m_InputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); - - lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(), - m_RecurrentToInputWeightsTensor.get(), - m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr, - m_InputGateBiasTensor.get()); - } - - if (m_Data.m_Parameters.m_ProjectionEnabled) - { - m_ProjectionWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo()); - - m_ProjectionBiasTensor = std::make_unique<arm_compute::CLTensor>(); - if (m_Data.m_ProjectionBias != nullptr) - { - BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo()); - } - - lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(), - m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr); - } - - if (m_Data.m_Parameters.m_PeepholeEnabled) - { - m_CellToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo()); - - m_CellToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo()); - - lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get()); - } - - const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - const arm_compute::ICLTensor& output_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - const arm_compute::ICLTensor& cell_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor(); - - arm_compute::ICLTensor& output_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor(); - arm_compute::ICLTensor& cell_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[2])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[3])->GetTensor(); - - // Get the batch_size and the num_units from the cellStateIn dimensions - const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2]; - const unsigned int batch_size = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[0]); - const unsigned int num_units = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[1]); - - m_ScratchBuffer = std::make_unique<arm_compute::CLTensor>(); - if (m_Data.m_Parameters.m_CifgEnabled) - { - // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG - armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32); - BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1); - } - else - { - // scratch_buffer [num_units * 3, batch_size] without CIFG - armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32); - BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2); - } - - float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell; - float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj; - - // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations - arm_compute::ActivationLayerInfo activationLayerInfo; - if (m_Data.m_Parameters.m_ActivationFunc == 0) - { - // no activation, do nothing - } - else if (m_Data.m_Parameters.m_ActivationFunc == 1) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::RELU); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 3) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 4) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 6) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); - } - else - { - throw armnn::Exception("Wrong Type of Activation Function!"); - } - - - m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), - m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), - m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), - m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), - &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, - &cell_state_out, &output, lstm_param, activationLayerInfo, - cell_threshold, projection_threshold); - - armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); - - InitialiseArmComputeClTensorData(*m_InputToForgetWeightsTensor, - m_Data.m_InputToForgetWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_InputToCellWeightsTensor, - m_Data.m_InputToCellWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_InputToOutputWeightsTensor, - m_Data.m_InputToOutputWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, - m_Data.m_RecurrentToForgetWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, - m_Data.m_RecurrentToCellWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, - m_Data.m_RecurrentToOutputWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_ForgetGateBiasTensor, - m_Data.m_ForgetGateBias->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_CellBiasTensor, - m_Data.m_CellBias->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_OutputGateBiasTensor, - m_Data.m_OutputGateBias->GetConstTensor<float>()); - - if (!m_Data.m_Parameters.m_CifgEnabled) - { - InitialiseArmComputeClTensorData(*m_InputToInputWeightsTensor, - m_Data.m_InputToInputWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, - m_Data.m_RecurrentToInputWeights->GetConstTensor<float>()); - if (m_Data.m_CellToInputWeights != nullptr) - { - InitialiseArmComputeClTensorData(*m_CellToInputWeightsTensor, - m_Data.m_CellToInputWeights->GetConstTensor<float>()); - } - InitialiseArmComputeClTensorData(*m_InputGateBiasTensor, - m_Data.m_InputGateBias->GetConstTensor<float>()); - } - - if (m_Data.m_Parameters.m_ProjectionEnabled) - { - InitialiseArmComputeClTensorData(*m_ProjectionWeightsTensor, - m_Data.m_ProjectionWeights->GetConstTensor<float>()); - if (m_Data.m_ProjectionBias != nullptr) - { - InitialiseArmComputeClTensorData(*m_ProjectionBiasTensor, - m_Data.m_ProjectionBias->GetConstTensor<float>()); - } - } - - if (m_Data.m_Parameters.m_PeepholeEnabled) - { - InitialiseArmComputeClTensorData(*m_CellToForgetWeightsTensor, - m_Data.m_CellToForgetWeights->GetConstTensor<float>()); - InitialiseArmComputeClTensorData(*m_CellToOutputWeightsTensor, - m_Data.m_CellToOutputWeights->GetConstTensor<float>()); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_LstmLayer.prepare(); - FreeUnusedTensors(); -} - -void ClLstmFloatWorkload::Execute() const -{ - m_LstmLayer.run(); -} - -arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, - const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, - const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, - const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, - const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, - const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, - const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, - const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights) -{ - arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info; - - // The inputs and the outputs - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn); - const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn); - const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer); - const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut); - const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - // Basic parameters - const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights); - const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights); - const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights); - const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo - = BuildArmComputeTensorInfo(recurrentToForgetWeights); - const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo - = BuildArmComputeTensorInfo(recurrentToCellWeights); - const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo - = BuildArmComputeTensorInfo(recurrentToOutputWeights); - const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias); - const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias); - const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias); - - arm_compute::TensorInfo aclInputToInputWeightsInfo; - arm_compute::TensorInfo aclRecurrentToInputWeightsInfo; - arm_compute::TensorInfo aclCellToInputWeightsInfo; - arm_compute::TensorInfo aclInputGateBiasInfo; - arm_compute::TensorInfo aclProjectionWeightsInfo; - arm_compute::TensorInfo aclProjectionBiasInfo; - arm_compute::TensorInfo aclCellToForgetWeightsInfo; - arm_compute::TensorInfo aclCellToOutputWeightsInfo; - - if (!descriptor.m_CifgEnabled) - { - armnn::TensorInfo inputToInputWInfo = *inputToInputWeights; - aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo); - armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights; - aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo); - - if (cellToInputWeights != nullptr) - { - armnn::TensorInfo cellToInputWInfo = *cellToInputWeights; - aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo); - } - armnn::TensorInfo inputGateBiasInfo = *inputGateBias; - aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo); - lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo, - cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr, - &aclInputGateBiasInfo); - } - - if (descriptor.m_ProjectionEnabled) - { - const armnn::TensorInfo& projectionWInfo = *projectionWeights; - aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo); - - if (projectionBias != nullptr) - { - const armnn::TensorInfo& projectionBiasInfo = *projectionBias; - aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo); - } - lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, - projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr); - } - - if (descriptor.m_PeepholeEnabled) - { - const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights; - aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo); - const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights; - aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo); - lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo); - } - - float cell_threshold = descriptor.m_ClippingThresCell; - float projection_threshold = descriptor.m_ClippingThresProj; - - // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations - arm_compute::ActivationLayerInfo activationLayerInfo; - if (descriptor.m_ActivationFunc == 0) - { - // no activation, do nothing - } - else if (descriptor.m_ActivationFunc == 1) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::RELU); - } - else if (descriptor.m_ActivationFunc == 3) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); - } - else if (descriptor.m_ActivationFunc == 4) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); - } - else if (descriptor.m_ActivationFunc == 6) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); - } - else - { - throw armnn::Exception("Wrong Type of Activation Function!"); - } - - return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo, - &aclInputToCellWeightsInfo, - &aclInputToOutputWeightsInfo, - &aclRecurrentToForgetWeightsInfo, - &aclRecurrentToCellWeightsInfo, - &aclRecurrentToOutputWeightsInfo, - &aclForgetGateBiasInfo, - &aclCellBiasInfo, - &aclOutputGateBiasInfo, - &aclOutputStateInInfo, &aclCellStateInInfo, - &aclScratchBufferInfo, &aclOutputStateOutInfo, - &aclCellStateOutInfo, &aclOutputInfo, - lstm_params_info, activationLayerInfo, - cell_threshold, projection_threshold); -} - -void ClLstmFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_InputToInputWeightsTensor); - FreeTensorIfUnused(m_InputToForgetWeightsTensor); - FreeTensorIfUnused(m_InputToCellWeightsTensor); - FreeTensorIfUnused(m_InputToOutputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); - FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); - FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); - FreeTensorIfUnused(m_CellToInputWeightsTensor); - FreeTensorIfUnused(m_CellToForgetWeightsTensor); - FreeTensorIfUnused(m_CellToOutputWeightsTensor); - FreeTensorIfUnused(m_InputGateBiasTensor); - FreeTensorIfUnused(m_ForgetGateBiasTensor); - FreeTensorIfUnused(m_CellBiasTensor); - FreeTensorIfUnused(m_OutputGateBiasTensor); - FreeTensorIfUnused(m_ProjectionWeightsTensor); - FreeTensorIfUnused(m_ProjectionBiasTensor); - FreeTensorIfUnused(m_ScratchBuffer); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClLstmFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClLstmFloatWorkload.hpp deleted file mode 100644 index 61d8fc3e6c..0000000000 --- a/src/armnn/backends/ClWorkloads/ClLstmFloatWorkload.hpp +++ /dev/null @@ -1,68 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -class ClLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor> -{ -public: - ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLLSTMLayer m_LstmLayer; - - std::unique_ptr<arm_compute::CLTensor> m_InputToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToCellWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToCellWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_ForgetGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_OutputGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_ProjectionWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_ProjectionBiasTensor; - - std::unique_ptr<arm_compute::CLTensor> m_ScratchBuffer; - - void FreeUnusedTensors(); -}; - -arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor &descriptor, - const TensorInfo& inputToForgetWeights, - const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, - const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, - const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, - const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, - const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, - const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, - const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights); -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClMergerFloatWorkload.cpp deleted file mode 100644 index 151f1e0ee7..0000000000 --- a/src/armnn/backends/ClWorkloads/ClMergerFloatWorkload.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMergerFloatWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClMergerFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloatWorkload_Execute"); - ClBaseMergerWorkload::Execute(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClMergerFloatWorkload.hpp deleted file mode 100644 index 9782f7a8f3..0000000000 --- a/src/armnn/backends/ClWorkloads/ClMergerFloatWorkload.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseMergerWorkload.hpp" - -namespace armnn -{ - -class ClMergerFloatWorkload : public ClBaseMergerWorkload<DataType::Float16, DataType::Float32> -{ -public: - using ClBaseMergerWorkload<DataType::Float16, DataType::Float32>::ClBaseMergerWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn - - diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp deleted file mode 100644 index 9d1060d857..0000000000 --- a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMergerUint8Workload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClMergerUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute"); - ClBaseMergerWorkload<DataType::QuantisedAsymm8>::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp deleted file mode 100644 index cbfc19a0f2..0000000000 --- a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseMergerWorkload.hpp" - -namespace armnn -{ - -class ClMergerUint8Workload : public ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8> -{ -public: - using ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>::ClBaseMergerWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp deleted file mode 100644 index c3330a98e8..0000000000 --- a/src/armnn/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMultiplicationFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, - // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be - // ignored for F32 tensors. - return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1, - &aclInput2, - &aclOutput, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_ZERO); -} - - -ClMultiplicationFloatWorkload::ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClMultiplicationFloatWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - // Construct - m_PixelWiseMultiplication.configure(&input0, - &input1, - &output, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_NEAREST_EVEN); -} - -void ClMultiplicationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloatWorkload_Execute"); - - // Executes the layer. - m_PixelWiseMultiplication.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp deleted file mode 100644 index c2d6b7697a..0000000000 --- a/src/armnn/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClMultiplicationFloatWorkload : public FloatWorkload<MultiplicationQueueDescriptor> -{ -public: - ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); - - using FloatWorkload<MultiplicationQueueDescriptor>::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; -}; - -} //namespace armnn - - - diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp deleted file mode 100644 index d2625354ef..0000000000 --- a/src/armnn/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClNormalizationFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ClLayerSupport.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const NormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - arm_compute::NormalizationLayerInfo layerInfo = - armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); - - return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); -} - -ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::NormalizationLayerInfo normalizationInfo = - armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); - - m_NormalizationLayer.configure(&input, &output, normalizationInfo); -}; - -void ClNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); - m_NormalizationLayer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp deleted file mode 100644 index f02d0adb70..0000000000 --- a/src/armnn/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor); - -class ClNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor> -{ -public: - ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp deleted file mode 100644 index 29d98bf0eb..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPermuteWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -#include <arm_compute/core/Error.h> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor) -{ - const armnn::PermutationVector& perm = descriptor.m_DimMappings; - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U }) - && !perm.IsEqual({ 0U, 2U, 3U, 1U }) - && !perm.IsEqual({ 3U, 2U, 0U, 1U }), - "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported"); - - return arm_compute::Status{}; -} - -template <armnn::DataType... DataTypes> -ClPermuteWorkload<DataTypes...>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info) -{ - using armcomputetensorutils::BuildArmComputePermutationVector; - - m_Data.ValidateInputsOutputs(GetName(), 1, 1); - - const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - - // Run the layer. - m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); -} - -template <armnn::DataType... DataTypes> -void ClPermuteWorkload<DataTypes...>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); - m_PermuteFunction.run(); -} - -template class ClPermuteWorkload<DataType::Float16, DataType::Float32>; -template class ClPermuteWorkload<DataType::QuantisedAsymm8>; - -} // namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp deleted file mode 100644 index a1f3161921..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -#include <armnn/TypesUtils.hpp> -#include <arm_compute/runtime/CL/functions/CLPermute.h> - -#include <string> - -namespace armnn -{ - -arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor); - -template<armnn::DataType... DataTypes> -class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...> -{ -public: - static const std::string& GetName() - { - static const std::string name = std::string("ClPermuteWorkload"); - return name; - } - - ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data; - mutable arm_compute::CLPermute m_PermuteFunction; -}; - -using ClPermuteFloatWorkload = ClPermuteWorkload<DataType::Float16, DataType::Float32>; -using ClPermuteUint8Workload = ClPermuteWorkload<DataType::QuantisedAsymm8>; - -} // namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp deleted file mode 100644 index a1ee50b39f..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPooling2dBaseWorkload.hpp" -#include "backends/ClLayerSupport.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); - - return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); -} - -template <armnn::DataType... dataTypes> -ClPooling2dBaseWorkload<dataTypes...>::ClPooling2dBaseWorkload( - const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) - : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info) -{ - m_Data.ValidateInputsOutputs(name, 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); - - // Run the layer. - m_PoolingLayer.configure(&input, &output, layerInfo); -} - -template class ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>; -template class ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>; - -} diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp deleted file mode 100644 index ea7ddfb41b..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor); - -// Base class template providing an implementation of the Pooling2d layer common to all data types. -template <armnn::DataType... dataTypes> -class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...> -{ -public: - using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data; - - ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, - const std::string& name); - -protected: - mutable arm_compute::CLPoolingLayer m_PoolingLayer; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp deleted file mode 100644 index dc9d17f0ae..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPooling2dFloatWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClPooling2dFloatWorkload::ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>(descriptor, info, "ClPooling2dFloatWorkload") -{ -} - -void ClPooling2dFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloatWorkload_Execute"); - m_PoolingLayer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp deleted file mode 100644 index 71648d40f4..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include "ClPooling2dBaseWorkload.hpp" - -namespace armnn -{ -class ClPooling2dFloatWorkload : public ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32> -{ -public: - ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp deleted file mode 100644 index 0b4b15f806..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPooling2dUint8Workload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>(descriptor, info, "ClPooling2dUint8Workload") -{ -} - -void ClPooling2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute"); - m_PoolingLayer.run(); -} - -} //namespace armnn - - diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp deleted file mode 100644 index 2baf2aa708..0000000000 --- a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include "ClPooling2dBaseWorkload.hpp" - -namespace armnn -{ - -class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload<DataType::QuantisedAsymm8> -{ -public: - ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -}; - -} //namespace armnn - - diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeFloatWorkload.cpp deleted file mode 100644 index ea50436a66..0000000000 --- a/src/armnn/backends/ClWorkloads/ClReshapeFloatWorkload.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClReshapeFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClReshapeFloatWorkload::ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClReshapeFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void ClReshapeFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeFloatWorkload.hpp deleted file mode 100644 index 48265143e5..0000000000 --- a/src/armnn/backends/ClWorkloads/ClReshapeFloatWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -class ClReshapeFloatWorkload : public FloatWorkload<ReshapeQueueDescriptor> -{ -public: - ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLReshapeLayer m_Layer; -}; - -} //namespace armnn - - diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp deleted file mode 100644 index 82bd93ef9c..0000000000 --- a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClReshapeUint8Workload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output); -} - -void ClReshapeUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute"); - - m_Layer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp deleted file mode 100644 index c9801a3ae1..0000000000 --- a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -// Reshape -class ClReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> -{ -public: - ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLReshapeLayer m_Layer; -}; - -} //namespace armnn - - diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp deleted file mode 100644 index 8348afb76a..0000000000 --- a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClResizeBilinearFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ClLayerSupport.hpp" -#include "backends/ArmComputeUtils.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClResizeBilinearFloatWorkload::ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<ResizeBilinearQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClResizeBilinearFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR, - arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f), - arm_compute::SamplingPolicy::TOP_LEFT); -}; - -void ClResizeBilinearFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloatWorkload_Execute"); - m_ResizeBilinearLayer.run(); -} - - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp deleted file mode 100644 index f2ee67f5dd..0000000000 --- a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -class ClResizeBilinearFloatWorkload : public FloatWorkload<ResizeBilinearQueueDescriptor> -{ -public: - ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLScale m_ResizeBilinearLayer; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp deleted file mode 100644 index b4ea236d49..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxBaseWorkload.hpp" - -#include "backends/ArmComputeTensorUtils.hpp" - -#include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h> - -namespace armnn -{ - -arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output) -{ - // NOTE: We report 4D Softmax as unsupported until full support is added to ACL - if(input.GetShape().GetNumDimensions() >= 4u) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); - } - - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo); -} - -} diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp deleted file mode 100644 index b800056cdf..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> -#include <arm_compute/core/Error.h> - -namespace armnn -{ - -arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output); - -} // namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp deleted file mode 100644 index c34b5a2a74..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); -} - -void ClSoftmaxFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute"); - m_SoftmaxLayer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp deleted file mode 100644 index 965b845cf8..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <memory> - -namespace armnn -{ - -class ClSoftmaxFloatWorkload : public FloatWorkload<SoftmaxQueueDescriptor> -{ -public: - ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp deleted file mode 100644 index 1bb9628d74..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxUint8Workload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - const auto outputQuantization = output.info()->quantization_info(); - - if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) - { - throw InvalidArgumentException( - "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); - } - - m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); -} - -void ClSoftmaxUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute"); - - m_SoftmaxLayer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp deleted file mode 100644 index 29427a5976..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <memory> - -namespace armnn -{ -// Softmax -class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> -{ -public: - ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - void Execute() const override; -private: - - mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterFloatWorkload.cpp deleted file mode 100644 index 5fd634bdb6..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSplitterFloatWorkload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSplitterFloatWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClSplitterFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloatWorkload_Execute"); - ClBaseSplitterWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterFloatWorkload.hpp deleted file mode 100644 index a0b5846f8e..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSplitterFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseSplitterWorkload.hpp" - -namespace armnn -{ - -class ClSplitterFloatWorkload : public ClBaseSplitterWorkload<DataType::Float16, DataType::Float32> -{ -public: - using ClBaseSplitterWorkload<DataType::Float16, DataType::Float32>::ClBaseSplitterWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp deleted file mode 100644 index 50a251ada7..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSplitterUint8Workload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClSplitterUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute"); - ClBaseSplitterWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp deleted file mode 100644 index 19e8be5034..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseSplitterWorkload.hpp" - -namespace armnn -{ -class ClSplitterUint8Workload : public ClBaseSplitterWorkload<DataType::QuantisedAsymm8> -{ -public: - using ClBaseSplitterWorkload<DataType::QuantisedAsymm8>::ClBaseSplitterWorkload; - virtual void Execute() const override; -}; -} //namespace armnn - - - diff --git a/src/armnn/backends/ClWorkloads/ClSubtractionWorkload.cpp b/src/armnn/backends/ClWorkloads/ClSubtractionWorkload.cpp deleted file mode 100644 index 1c70130fa4..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSubtractionWorkload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSubtractionWorkload.hpp" - -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -template <armnn::DataType... T> -ClSubtractionWorkload<T...>::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<SubtractionQueueDescriptor, T...>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); -} - -template <armnn::DataType... T> -void ClSubtractionWorkload<T...>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); - m_Layer.run(); -} - -bool ClSubtractionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo, - g_AclConvertPolicy); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return supported; -} - -} //namespace armnn - -template class armnn::ClSubtractionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; -template class armnn::ClSubtractionWorkload<armnn::DataType::QuantisedAsymm8>; diff --git a/src/armnn/backends/ClWorkloads/ClSubtractionWorkload.hpp b/src/armnn/backends/ClWorkloads/ClSubtractionWorkload.hpp deleted file mode 100644 index 59a5f01e73..0000000000 --- a/src/armnn/backends/ClWorkloads/ClSubtractionWorkload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <arm_compute/runtime/CL/CLFunctions.h> - -namespace armnn -{ - -template <armnn::DataType... dataTypes> -class ClSubtractionWorkload : public TypedWorkload<SubtractionQueueDescriptor, dataTypes...> -{ -public: - ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticSubtraction m_Layer; -}; - -bool ClSubtractionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported); -} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClWorkloadUtils.hpp b/src/armnn/backends/ClWorkloads/ClWorkloadUtils.hpp deleted file mode 100644 index 6f1b155745..0000000000 --- a/src/armnn/backends/ClWorkloads/ClWorkloadUtils.hpp +++ /dev/null @@ -1,62 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "OpenClTimer.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include <Half.hpp> - -#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ - ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ - name, \ - armnn::OpenClTimer(), \ - armnn::WallClockTimer()) - -namespace armnn -{ - -template <typename T> -void CopyArmComputeClTensorData(const T* srcData, arm_compute::CLTensor& dstTensor) -{ - { - ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting"); - dstTensor.map(true); - } - - { - ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor"); - armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor); - } - - dstTensor.unmap(); -} - -template <typename T> -void InitialiseArmComputeClTensorData(arm_compute::CLTensor& clTensor, const T* data) -{ - armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); - CopyArmComputeClTensorData<T>(data, clTensor); -} - -inline void InitializeArmComputeClTensorDataForFloatTypes(arm_compute::CLTensor& clTensor, - const ConstCpuTensorHandle *handle) -{ - BOOST_ASSERT(handle); - switch(handle->GetTensorInfo().GetDataType()) - { - case DataType::Float16: - InitialiseArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>()); - break; - case DataType::Float32: - InitialiseArmComputeClTensorData(clTensor, handle->GetConstTensor<float>()); - break; - default: - BOOST_ASSERT_MSG(false, "Unexpected floating point type."); - } -}; - -} //namespace armnn diff --git a/src/armnn/backends/CpuTensorHandle.cpp b/src/armnn/backends/CpuTensorHandle.cpp deleted file mode 100644 index 1a264531e5..0000000000 --- a/src/armnn/backends/CpuTensorHandle.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "armnn/Exceptions.hpp" -#include "CpuTensorHandle.hpp" - -#include <cstring> - -namespace armnn -{ - -ConstCpuTensorHandle::ConstCpuTensorHandle(const TensorInfo& tensorInfo) -: m_TensorInfo(tensorInfo) -, m_Memory(nullptr) -{ -} - -template <> -const void* ConstCpuTensorHandle::GetConstTensor() const -{ - return m_Memory; -} - -CpuTensorHandle::CpuTensorHandle(const TensorInfo& tensorInfo) -: ConstCpuTensorHandle(tensorInfo) -, m_MutableMemory(nullptr) -{ -} - -template <> -void* CpuTensorHandle::GetTensor() const -{ - return m_MutableMemory; -} - -ScopedCpuTensorHandle::ScopedCpuTensorHandle(const TensorInfo& tensorInfo) -: CpuTensorHandle(tensorInfo) -{ -} - -ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ConstTensor& tensor) -: ScopedCpuTensorHandle(tensor.GetInfo()) -{ - CopyFrom(tensor.GetMemoryArea(), tensor.GetNumBytes()); -} - -ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ConstCpuTensorHandle& tensorHandle) -: ScopedCpuTensorHandle(tensorHandle.GetTensorInfo()) -{ - CopyFrom(tensorHandle.GetConstTensor<void>(), tensorHandle.GetTensorInfo().GetNumBytes()); -} - -ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ScopedCpuTensorHandle& other) -: CpuTensorHandle(other.GetTensorInfo()) -{ - CopyFrom(other); -} - -ScopedCpuTensorHandle& ScopedCpuTensorHandle::operator=(const ScopedCpuTensorHandle& other) -{ - ::operator delete(GetTensor<void>()); - SetMemory(nullptr); - CopyFrom(other); - return *this; -} - -ScopedCpuTensorHandle::~ScopedCpuTensorHandle() -{ - ::operator delete(GetTensor<void>()); -} - -void ScopedCpuTensorHandle::Allocate() -{ - if (GetTensor<void>() == nullptr) - { - SetMemory(::operator new(GetTensorInfo().GetNumBytes())); - } - else - { - throw InvalidArgumentException("CpuTensorHandle::Allocate Trying to allocate a CpuTensorHandle" - "that already has allocated memory."); - } -} - -void ScopedCpuTensorHandle::CopyFrom(const ScopedCpuTensorHandle& other) -{ - CopyFrom(other.GetTensor<void>(), other.GetTensorInfo().GetNumBytes()); -} - -void ScopedCpuTensorHandle::CopyFrom(const void* srcMemory, unsigned int numBytes) -{ - BOOST_ASSERT(GetTensor<void>() == nullptr); - BOOST_ASSERT(GetTensorInfo().GetNumBytes() == numBytes); - - if (srcMemory) - { - Allocate(); - memcpy(GetTensor<void>(), srcMemory, numBytes); - } -} - -void PassthroughCpuTensorHandle::Allocate() -{ - throw InvalidArgumentException("PassthroughCpuTensorHandle::Allocate() should never be called"); -} - -void ConstPassthroughCpuTensorHandle::Allocate() -{ - throw InvalidArgumentException("ConstPassthroughCpuTensorHandle::Allocate() should never be called"); -} - -} // namespace armnn diff --git a/src/armnn/backends/CpuTensorHandle.hpp b/src/armnn/backends/CpuTensorHandle.hpp deleted file mode 100644 index 541beefde6..0000000000 --- a/src/armnn/backends/CpuTensorHandle.hpp +++ /dev/null @@ -1,171 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once -#include "CpuTensorHandleFwd.hpp" - -#include "armnn/TypesUtils.hpp" - -#include "OutputHandler.hpp" - -#include <algorithm> - -namespace armnn -{ - -// Abstract tensor handles wrapping a CPU-readable region of memory, interpreting it as tensor data. -class ConstCpuTensorHandle : public ITensorHandle -{ -public: - template <typename T> - const T* GetConstTensor() const - { - BOOST_ASSERT(GetTensorInfo().GetDataType() == GetDataType<T>()); - return reinterpret_cast<const T*>(m_Memory); - } - - const TensorInfo& GetTensorInfo() const - { - return m_TensorInfo; - } - - virtual ITensorHandle::Type GetType() const override - { - return ITensorHandle::Cpu; - } - - virtual void Manage() override {} - - virtual ITensorHandle* GetParent() const override { return nullptr; } - - virtual const void* Map(bool /* blocking = true */) const override { return m_Memory; } - virtual void Unmap() const override {} - - TensorShape GetStrides() const override - { - TensorShape shape(m_TensorInfo.GetShape()); - auto size = GetDataTypeSize(m_TensorInfo.GetDataType()); - auto runningSize = size; - std::vector<unsigned int> strides(shape.GetNumDimensions()); - auto lastIdx = shape.GetNumDimensions()-1; - for (unsigned int i=0; i < lastIdx ; i++) - { - strides[lastIdx-i] = runningSize; - runningSize *= shape[lastIdx-i]; - } - strides[0] = runningSize; - return TensorShape(shape.GetNumDimensions(), strides.data()); - } - TensorShape GetShape() const override { return m_TensorInfo.GetShape(); } - -protected: - ConstCpuTensorHandle(const TensorInfo& tensorInfo); - - void SetConstMemory(const void* mem) { m_Memory = mem; } - -private: - ConstCpuTensorHandle(const ConstCpuTensorHandle& other) = delete; - ConstCpuTensorHandle& operator=(const ConstCpuTensorHandle& other) = delete; - - TensorInfo m_TensorInfo; - const void* m_Memory; -}; - -// Abstract specialization of ConstCpuTensorHandle that allows write access to the same data. -class CpuTensorHandle : public ConstCpuTensorHandle -{ -public: - template <typename T> - T* GetTensor() const - { - BOOST_ASSERT(GetTensorInfo().GetDataType() == GetDataType<T>()); - return reinterpret_cast<T*>(m_MutableMemory); - } - -protected: - CpuTensorHandle(const TensorInfo& tensorInfo); - - void SetMemory(void* mem) - { - m_MutableMemory = mem; - SetConstMemory(m_MutableMemory); - } - -private: - - CpuTensorHandle(const CpuTensorHandle& other) = delete; - CpuTensorHandle& operator=(const CpuTensorHandle& other) = delete; - void* m_MutableMemory; -}; - -// A CpuTensorHandle that owns the wrapped memory region. -class ScopedCpuTensorHandle : public CpuTensorHandle -{ -public: - explicit ScopedCpuTensorHandle(const TensorInfo& tensorInfo); - - // Copies contents from Tensor. - explicit ScopedCpuTensorHandle(const ConstTensor& tensor); - - // Copies contents from ConstCpuTensorHandle - explicit ScopedCpuTensorHandle(const ConstCpuTensorHandle& tensorHandle); - - ScopedCpuTensorHandle(const ScopedCpuTensorHandle& other); - ScopedCpuTensorHandle& operator=(const ScopedCpuTensorHandle& other); - ~ScopedCpuTensorHandle(); - - virtual void Allocate() override; - -private: - void CopyFrom(const ScopedCpuTensorHandle& other); - void CopyFrom(const void* srcMemory, unsigned int numBytes); -}; - -// A CpuTensorHandle that wraps an already allocated memory region. -// -// Clients must make sure the passed in memory region stays alive for the lifetime of -// the PassthroughCpuTensorHandle instance. -// -// Note there is no polymorphism to/from ConstPassthroughCpuTensorHandle. -class PassthroughCpuTensorHandle : public CpuTensorHandle -{ -public: - PassthroughCpuTensorHandle(const TensorInfo& tensorInfo, void* mem) - : CpuTensorHandle(tensorInfo) - { - SetMemory(mem); - } - - virtual void Allocate() override; -}; - -// A ConstCpuTensorHandle that wraps an already allocated memory region. -// -// This allows users to pass in const memory to a network. -// Clients must make sure the passed in memory region stays alive for the lifetime of -// the PassthroughCpuTensorHandle instance. -// -// Note there is no polymorphism to/from PassthroughCpuTensorHandle. -class ConstPassthroughCpuTensorHandle : public ConstCpuTensorHandle -{ -public: - ConstPassthroughCpuTensorHandle(const TensorInfo& tensorInfo, const void* mem) - : ConstCpuTensorHandle(tensorInfo) - { - SetConstMemory(mem); - } - - virtual void Allocate() override; -}; - - -// Template specializations. - -template <> -const void* ConstCpuTensorHandle::GetConstTensor() const; - -template <> -void* CpuTensorHandle::GetTensor() const; - -} // namespace armnn diff --git a/src/armnn/backends/CpuTensorHandleFwd.hpp b/src/armnn/backends/CpuTensorHandleFwd.hpp deleted file mode 100644 index d439d0bbe6..0000000000 --- a/src/armnn/backends/CpuTensorHandleFwd.hpp +++ /dev/null @@ -1,16 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -namespace armnn -{ - -class ConstCpuTensorHandle; -class CpuTensorHandle; -class ScopedCpuTensorHandle; -class PassthroughCpuTensorHandle; -class ConstPassthroughCpuTensorHandle; - -} // namespace armnn diff --git a/src/armnn/backends/ITensorHandle.hpp b/src/armnn/backends/ITensorHandle.hpp deleted file mode 100644 index 02f4ed6e5a..0000000000 --- a/src/armnn/backends/ITensorHandle.hpp +++ /dev/null @@ -1,73 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -namespace armnn -{ - -class TensorShape; - -class ITensorHandle -{ -public: - enum Type - { - Cpu, - CL, - Neon - }; - - virtual ~ITensorHandle(){} - - /// Indicate to the memory manager that this resource is active. - /// This is used to compute overlapping lifetimes of resources. - virtual void Manage() = 0; - - /// Indicate to the memory manager that this resource is no longer active. - /// This is used to compute overlapping lifetimes of resources. - virtual void Allocate() = 0; - - /// Get the type backend associated with the tensor handle. - /// \return Type enum - virtual ITensorHandle::Type GetType() const = 0; - - /// Get the parent tensor if this is a subtensor. - /// \return a pointer to the parent tensor. Otherwise nullptr if not a subtensor. - virtual ITensorHandle* GetParent() const = 0; - - /// Map the tensor data for access. - /// \param blocking hint to block the calling thread until all other accesses are complete. (backend dependent) - /// \return pointer to the first element of the mapped data. - virtual const void* Map(bool blocking=true) const = 0; - - /// Unmap the tensor data - virtual void Unmap() const = 0; - - /// Map the tensor data for access. Must be paired with call to Unmap(). - /// \param blocking hint to block the calling thread until all other accesses are complete. (backend dependent) - /// \return pointer to the first element of the mapped data. - void* Map(bool blocking=true) - { - return const_cast<void*>(static_cast<const ITensorHandle*>(this)->Map(blocking)); - } - - /// Unmap the tensor data that was previously mapped with call to Map(). - void Unmap() - { - return static_cast<const ITensorHandle*>(this)->Unmap(); - } - - /// Get the strides for each dimension ordered from largest to smallest where - /// the smallest value is the same as the size of a single element in the tensor. - /// \return a TensorShape filled with the strides for each dimension - virtual TensorShape GetStrides() const = 0; - - /// Get the number of elements for each dimension orderd from slowest iterating dimension - /// to fastest iterating dimension. - /// \return a TensorShape filled with the number of elements for each dimension. - virtual TensorShape GetShape() const = 0; -}; - -} diff --git a/src/armnn/backends/MakeWorkloadHelper.hpp b/src/armnn/backends/MakeWorkloadHelper.hpp deleted file mode 100644 index 281a65a21e..0000000000 --- a/src/armnn/backends/MakeWorkloadHelper.hpp +++ /dev/null @@ -1,78 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -namespace armnn -{ -namespace -{ - -// Make a workload of the specified WorkloadType. -template<typename WorkloadType> -struct MakeWorkloadForType -{ - template<typename QueueDescriptorType, typename... Args> - static std::unique_ptr<WorkloadType> Func(const QueueDescriptorType& descriptor, - const WorkloadInfo& info, - Args&&... args) - { - return std::make_unique<WorkloadType>(descriptor, info, std::forward<Args>(args)...); - } -}; - -// Specialization for void workload type used for unsupported workloads. -template<> -struct MakeWorkloadForType<NullWorkload> -{ - template<typename QueueDescriptorType, typename... Args> - static std::unique_ptr<NullWorkload> Func(const QueueDescriptorType& descriptor, - const WorkloadInfo& info, - Args&&... args) - { - return nullptr; - } -}; - -// Makes a workload for one the specified types based on the data type requirements of the tensorinfo. -// Specify type void as the WorkloadType for unsupported DataType/WorkloadType combos. -template <typename Float16Workload, typename Float32Workload, typename Uint8Workload, typename QueueDescriptorType, - typename... Args> -std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info, Args&&... args) -{ - const DataType dataType = !info.m_InputTensorInfos.empty() ? - info.m_InputTensorInfos[0].GetDataType() - : info.m_OutputTensorInfos[0].GetDataType(); - - BOOST_ASSERT(info.m_InputTensorInfos.empty() || info.m_OutputTensorInfos.empty() - || info.m_InputTensorInfos[0].GetDataType() == info.m_OutputTensorInfos[0].GetDataType()); - - switch (dataType) - { - case DataType::Float16: - return MakeWorkloadForType<Float16Workload>::Func(descriptor, info, std::forward<Args>(args)...); - case DataType::Float32: - return MakeWorkloadForType<Float32Workload>::Func(descriptor, info, std::forward<Args>(args)...); - case DataType::QuantisedAsymm8: - return MakeWorkloadForType<Uint8Workload>::Func(descriptor, info, std::forward<Args>(args)...); - default: - BOOST_ASSERT_MSG(false, "Unknown DataType."); - return nullptr; - } -} - -// Makes a workload for one the specified types based on the data type requirements of the tensorinfo. -// Calling this method is the equivalent of calling the three typed MakeWorkload method with <FloatWorkload, -// FloatWorkload, Uint8Workload>. -// Specify type void as the WorkloadType for unsupported DataType/WorkloadType combos. -template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args> -std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info, Args&&... args) -{ - return MakeWorkload<FloatWorkload, FloatWorkload, Uint8Workload>(descriptor, info, - std::forward<Args>(args)...); -} - - -} //namespace -} //namespace armnn diff --git a/src/armnn/backends/MemCopyWorkload.cpp b/src/armnn/backends/MemCopyWorkload.cpp deleted file mode 100644 index 75271a09de..0000000000 --- a/src/armnn/backends/MemCopyWorkload.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "MemCopyWorkload.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "TypeUtils.hpp" - -#include <cstring> -#include <boost/cast.hpp> - -namespace armnn -{ - -namespace -{ - -template <typename SrcTensorHandleType, typename DstTensorHandleType> -void GatherTensorHandlePairs(const MemCopyQueueDescriptor& descriptor, - std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs) -{ - const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size()); - tensorHandlePairs.reserve(numInputs); - - for (unsigned int i = 0; i < numInputs; ++i) - { - SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast<SrcTensorHandleType*>( - descriptor.m_Inputs[i]); - DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast<DstTensorHandleType*>( - descriptor.m_Outputs[i]); - - tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle); - } -} - -} //namespace - - -CopyMemGenericWorkload::CopyMemGenericWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<MemCopyQueueDescriptor>(descriptor, info) -{ - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -void CopyMemGenericWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyMemGeneric_Execute"); - - auto copyFunc = [](void* dst, const void* src, size_t size) - { - memcpy(dst, src, size); - }; - - for (const auto& pair : m_TensorHandlePairs) - { - CopyTensorContentsGeneric(pair.first, pair.second, copyFunc); - } -} - -} //namespace armnn diff --git a/src/armnn/backends/MemCopyWorkload.hpp b/src/armnn/backends/MemCopyWorkload.hpp deleted file mode 100644 index 5227f32c9f..0000000000 --- a/src/armnn/backends/MemCopyWorkload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "CpuTensorHandleFwd.hpp" -#include "backends/Workload.hpp" -#include "WorkloadUtils.hpp" -#include <utility> - -namespace armnn -{ - -class CopyMemGenericWorkload : public BaseWorkload<MemCopyQueueDescriptor> -{ -public: - CopyMemGenericWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>; - std::vector<TensorHandlePair> m_TensorHandlePairs; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp deleted file mode 100644 index 30956dfba0..0000000000 --- a/src/armnn/backends/NeonLayerSupport.cpp +++ /dev/null @@ -1,468 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonLayerSupport.hpp" - -#include "LayerSupportCommon.hpp" -#include "InternalTypes.hpp" - -#include <armnn/Descriptors.hpp> -#include <armnn/Types.hpp> -#include <armnn/Tensor.hpp> - -#include <boost/core/ignore_unused.hpp> - -#ifdef ARMCOMPUTENEON_ENABLED -#include "NeonWorkloads/NeonAdditionFloatWorkload.hpp" -#include "NeonWorkloads/NeonActivationFloatWorkload.hpp" -#include "NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp" -#include "NeonWorkloads/NeonConvolution2dBaseWorkload.hpp" -#include "NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp" -#include "NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" -#include "NeonWorkloads/NeonMultiplicationFloatWorkload.hpp" -#include "NeonWorkloads/NeonNormalizationFloatWorkload.hpp" -#include "NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" -#include "NeonWorkloads/NeonPermuteWorkload.hpp" -#include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp" -#include "NeonWorkloads/NeonSoftmaxBaseWorkload.hpp" -#include "NeonWorkloads/NeonSubtractionFloatWorkload.hpp" -#endif - -using namespace boost; - -namespace armnn -{ - -bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) -{ - // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases, - // and complement with NEDirectConvolutionLayerKernel::configure() implementation. - - // Only 1x1 is using direct convolution. Performance results and details are in: - // https://jira.arm.com/browse/IVGCVSW-1003 - // Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc - - const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32); - - // Strides: 1|2|3 - const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) && - (desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3); - - auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value) - { - return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value || - conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value; - }; - - // Supported sizes and padding. - const bool sizeAndPaddingSupported = - // Pad > 0 not supported for 1x1 weights. - (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u)); - - const bool preferDirectConvolution = dataTypeSupported && - strideSupported && - sizeAndPaddingSupported && - // NEDirectConvolutionLayerKernel doesn't support NULL bias. - desc.m_BiasEnabled; - return preferDirectConvolution; -} - -bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters) -{ - if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness) - { - if (reasonIfUnsupported) - { - *reasonIfUnsupported = "Unsupported normalisation method type, only LocalBrightness is supported"; - } - return false; - } - if (parameters.m_NormSize % 2 == 0) - { - if (reasonIfUnsupported) - { - *reasonIfUnsupported = "Normalization size must be an odd number."; - } - return false; - } - - return true; -} - -bool IsNeonBackendSupported(std::string* reasonIfUnsupported) -{ -#if ARMCOMPUTENEON_ENABLED - return true; -#else - if (reasonIfUnsupported != nullptr) - { - *reasonIfUnsupported = "The armnn library has been built without NEON support"; - } - return false; -#endif -} - -template<typename FloatFunc, typename Uint8Func, typename ... Params> -bool IsSupportedForDataTypeNeon(std::string* reasonIfUnsupported, - DataType dataType, - FloatFunc floatFuncPtr, - Uint8Func uint8FuncPtr, - Params&&... params) -{ - return IsNeonBackendSupported(reasonIfUnsupported) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - dataType, - floatFuncPtr, - floatFuncPtr, - uint8FuncPtr, - std::forward<Params>(params)...); -} - -#if ARMCOMPUTENEON_ENABLED -template<class FuncType, class... Args> -inline bool IsWorkloadSupported(FuncType& func, std::string* reasonIfUnsupported, Args&&... args) -{ - arm_compute::Status aclStatus = func(std::forward<Args>(args)...); - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - return supported; -} - -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); -#else -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsNeonBackendSupported(reasonIfUnsupported); -#endif - -bool IsActivationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool IsAdditionSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate, - reasonIfUnsupported, - input, - output, - mean, - var, - beta, - gamma, - descriptor); -} - -bool IsConstantSupportedNeon(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsConvolution2dSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool IsDivisionSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - // At the moment division is not supported - return false; -} - -bool IsSubtractionSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool IsFullyConnectedSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - // At the moment U8 is unsupported - if (input.GetDataType() == DataType::QuantisedAsymm8) - { - return false; - } - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate, - reasonIfUnsupported, - input, - output, - weights, - biases, - descriptor); -} - -bool IsInputSupportedNeon(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsL2NormalizationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output); -} - -bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - inputs[0]->GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsMultiplicationSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool IsNormalizationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsOutputSupportedNeon(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsPermuteSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsPooling2dSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsResizeBilinearSupportedNeon(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - return false; -} - -bool IsSoftmaxSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsSplitterSupportedNeon(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(descriptor); - return false; -} - -bool IsReshapeSupportedNeon(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsFloorSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - return IsNeonBackendSupported(reasonIfUnsupported) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - input.GetDataType(), - &FalseFuncF16<>, - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(outputStateIn); - ignore_unused(cellStateIn); - ignore_unused(scratchBuffer); - ignore_unused(outputStateOut); - ignore_unused(cellStateOut); - ignore_unused(output); - ignore_unused(descriptor); - ignore_unused(inputToForgetWeights); - ignore_unused(inputToCellWeights); - ignore_unused(inputToOutputWeights); - ignore_unused(recurrentToForgetWeights); - ignore_unused(recurrentToCellWeights); - ignore_unused(recurrentToOutputWeights); - ignore_unused(forgetGateBias); - ignore_unused(cellBias); - ignore_unused(outputGateBias); - ignore_unused(inputToInputWeights); - ignore_unused(recurrentToInputWeights); - ignore_unused(cellToInputWeights); - ignore_unused(inputGateBias); - ignore_unused(projectionWeights); - ignore_unused(projectionBias); - ignore_unused(cellToForgetWeights); - ignore_unused(cellToOutputWeights); - return false; -} - -bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(output); - return true; -} - -bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(output); - return true; -} - -bool IsMeanSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - return false; -} - -} diff --git a/src/armnn/backends/NeonLayerSupport.hpp b/src/armnn/backends/NeonLayerSupport.hpp deleted file mode 100644 index 95b14b3ba6..0000000000 --- a/src/armnn/backends/NeonLayerSupport.hpp +++ /dev/null @@ -1,163 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/DescriptorsFwd.hpp> -#include <armnn/Types.hpp> -#include <armnn/Tensor.hpp> - -#include <boost/optional.hpp> - -namespace armnn -{ - -bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); - -bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, - const NormalizationDescriptor& parameters); - -bool IsActivationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported); - -bool IsNeonDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, - const DepthwiseConvolution2dDescriptor& parameters, - const TensorInfo& weights); - -bool IsAdditionSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported); - -bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsConstantSupportedNeon(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvolution2dSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported = nullptr); - - -bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported = nullptr); - -bool IsDivisionSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsSubtractionSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsFullyConnectedSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsInputSupportedNeon(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsL2NormalizationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsMultiplicationSupportedNeon(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsNormalizationSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsOutputSupportedNeon(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsPermuteSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsPooling2dSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsResizeBilinearSupportedNeon(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsSoftmaxSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsSplitterSupportedNeon(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsReshapeSupportedNeon(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsFloorSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsMeanSupportedNeon(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -} diff --git a/src/armnn/backends/NeonTensorHandle.hpp b/src/armnn/backends/NeonTensorHandle.hpp deleted file mode 100644 index e385c83967..0000000000 --- a/src/armnn/backends/NeonTensorHandle.hpp +++ /dev/null @@ -1,137 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "OutputHandler.hpp" -#include "ArmComputeTensorUtils.hpp" - -#include <arm_compute/runtime/MemoryGroup.h> -#include <arm_compute/runtime/IMemoryGroup.h> -#include <arm_compute/runtime/Tensor.h> -#include <arm_compute/runtime/SubTensor.h> -#include <arm_compute/core/TensorShape.h> -#include <arm_compute/core/Coordinates.h> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ - -class INeonTensorHandle : public ITensorHandle -{ -public: - virtual arm_compute::ITensor& GetTensor() = 0; - virtual arm_compute::ITensor const& GetTensor() const = 0; - virtual arm_compute::DataType GetDataType() const = 0; - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0; -}; - -class NeonTensorHandle : public INeonTensorHandle -{ -public: - NeonTensorHandle(const TensorInfo& tensorInfo) - { - armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); - } - - arm_compute::ITensor& GetTensor() override { return m_Tensor; } - arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } - - virtual void Allocate() override - { - armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); - }; - - virtual void Manage() override - { - BOOST_ASSERT(m_MemoryGroup != nullptr); - m_MemoryGroup->manage(&m_Tensor); - } - - virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } - - virtual ITensorHandle* GetParent() const override { return nullptr; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override - { - m_MemoryGroup = boost::polymorphic_pointer_downcast<arm_compute::MemoryGroup>(memoryGroup); - } - - virtual const void* Map(bool /* blocking = true */) const override - { - return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - virtual void Unmap() const override {} - - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } - -private: - arm_compute::Tensor m_Tensor; - std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup; -}; - -class NeonSubTensorHandle : public INeonTensorHandle -{ -public: - NeonSubTensorHandle(INeonTensorHandle* parent, - const arm_compute::TensorShape& shape, - const arm_compute::Coordinates& coords) - : m_Tensor(&parent->GetTensor(), shape, coords) - { - parentHandle = parent; - } - - arm_compute::ITensor& GetTensor() override { return m_Tensor; } - arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } - - virtual void Allocate() override {} - virtual void Manage() override {} - - virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } - - virtual ITensorHandle* GetParent() const override { return parentHandle; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {} - - virtual const void* Map(bool /* blocking = true */) const override - { - return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - virtual void Unmap() const override {} - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } -private: - arm_compute::SubTensor m_Tensor; - ITensorHandle* parentHandle = nullptr; -}; - -} diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp deleted file mode 100644 index 80ce0b918e..0000000000 --- a/src/armnn/backends/NeonWorkloadFactory.cpp +++ /dev/null @@ -1,479 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "NeonWorkloadFactory.hpp" -#include "armnn/Utils.hpp" -#include "CpuTensorHandle.hpp" -#include "Layer.hpp" - -#ifdef ARMCOMPUTENEON_ENABLED -#include "arm_compute/runtime/Allocator.h" - -#include "MemCopyWorkload.hpp" -#include "NeonTensorHandle.hpp" -#include "NeonWorkloadUtils.hpp" -#include "NeonWorkloads.hpp" - -#include "memory/IPoolManager.hpp" -#endif - -#include "MakeWorkloadHelper.hpp" - -#include <boost/polymorphic_cast.hpp> - -namespace armnn -{ - -bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported) -{ - return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported); -} - -#ifdef ARMCOMPUTENEON_ENABLED - -NeonWorkloadFactory::NeonWorkloadFactory() - : m_MemoryManager(std::make_unique<arm_compute::Allocator>(), BaseMemoryManager::MemoryAffinity::Offset) -{ -} - -std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const -{ - BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon); - - const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); - - arm_compute::Coordinates coords; - coords.set_num_dimensions(subTensorShape.GetNumDimensions()); - for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) - { - // Arm compute indexes tensor coords in reverse order. - unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; - coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); - } - - return std::make_unique<NeonSubTensorHandle>( - boost::polymorphic_downcast<INeonTensorHandle*>(&parent), shape, coords); -} - -std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo); - tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); - - return tensorHandle; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonActivationFloatWorkload, NeonActivationUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonSoftmaxFloatWorkload, NeonSoftmaxUint8Workload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonSplitterFloatWorkload, NeonSplitterUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonMergerFloatWorkload, NeonMergerUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected( - const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NeonFullyConnectedFloatWorkload, NullWorkload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonPermuteFloatWorkload, NeonPermuteUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonPooling2dFloatWorkload, NeonPooling2dUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NeonConvolution2dFloatWorkload, NeonConvolution2dUint8Workload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NeonDepthwiseConvolutionFloatWorkload, NeonDepthwiseConvolutionUint8Workload>( - descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization( - const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonAdditionFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication( - const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NeonMultiplicationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision( - const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction( - const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NeonSubtractionFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NeonBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) - { - throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload"); - } - - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear( - const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization( - const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonConstantFloatWorkload, NeonConstantUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonReshapeFloatWorkload, NeonReshapeUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonFloorFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NeonLstmFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -void NeonWorkloadFactory::Finalize() -{ - m_MemoryManager.Finalize(); -} - -void NeonWorkloadFactory::Release() -{ - m_MemoryManager.Release(); -} - -void NeonWorkloadFactory::Acquire() -{ - m_MemoryManager.Acquire(); -} - -#else // Compiled without ArmCompute libs - -NeonWorkloadFactory::NeonWorkloadFactory() -{ -} - -std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const -{ - return nullptr; -} - -std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization( - const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& data, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -void NeonWorkloadFactory::Finalize() -{} - -void NeonWorkloadFactory::Release() -{} - -void NeonWorkloadFactory::Acquire() -{} - -#endif - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloadFactory.hpp b/src/armnn/backends/NeonWorkloadFactory.hpp deleted file mode 100644 index a981855314..0000000000 --- a/src/armnn/backends/NeonWorkloadFactory.hpp +++ /dev/null @@ -1,135 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "OutputHandler.hpp" - -#include "memory/BaseMemoryManager.hpp" - -#include <boost/core/ignore_unused.hpp> -#include <boost/optional.hpp> - -namespace armnn -{ - -// Neon workload factory. -class NeonWorkloadFactory : public IWorkloadFactory -{ -public: - NeonWorkloadFactory(); - - virtual Compute GetCompute() const override { return Compute::CpuAcc; } - - static bool IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported); - - virtual bool SupportsSubTensors() const override { return true; } - - virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const override; - - virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; - - virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& Info) const override; - - virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual void Finalize() override; - - virtual void Release() override; - - virtual void Acquire() override; - -private: -#ifdef ARMCOMPUTENEON_ENABLED - mutable NeonMemoryManager m_MemoryManager; -#endif -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloadUtils.cpp b/src/armnn/backends/NeonWorkloadUtils.cpp deleted file mode 100644 index 010299f3b3..0000000000 --- a/src/armnn/backends/NeonWorkloadUtils.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "NeonWorkloadUtils.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/NeonTensorHandle.hpp" - -#include "armnn/Utils.hpp" -#include "armnn/Exceptions.hpp" - -#include <cstring> -#include <boost/assert.hpp> -#include <boost/cast.hpp> -#include <boost/format.hpp> - -#include "Profiling.hpp" - -#include "NeonLayerSupport.hpp" -#include "../../../include/armnn/Types.hpp" -#include "Half.hpp" - -using namespace armnn::armcomputetensorutils; - -namespace armnn -{ - -// Allocates a tensor and copy the contents in data to the tensor contents. -template<typename T> -void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data) -{ - InitialiseArmComputeTensorEmpty(tensor); - CopyArmComputeITensorData(data, tensor); -} - -template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const Half* data); -template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const float* data); -template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const uint8_t* data); -template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const int32_t* data); - -void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, - const ConstCpuTensorHandle* handle) -{ - BOOST_ASSERT(handle); - switch(handle->GetTensorInfo().GetDataType()) - { - case DataType::Float16: - InitialiseArmComputeTensorData(tensor, handle->GetConstTensor<Half>()); - break; - case DataType::Float32: - InitialiseArmComputeTensorData(tensor, handle->GetConstTensor<float>()); - break; - default: - BOOST_ASSERT_MSG(false, "Unexpected floating point type."); - } -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloadUtils.hpp b/src/armnn/backends/NeonWorkloadUtils.hpp deleted file mode 100644 index 15f9e3badf..0000000000 --- a/src/armnn/backends/NeonWorkloadUtils.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "Workload.hpp" - -#include "backends/NeonTensorHandle.hpp" -#include "NeonTimer.hpp" - -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEFunctions.h" -#include <arm_compute/runtime/SubTensor.h> - -#include <boost/cast.hpp> - -namespace armnn -{ -class Layer; - -template<typename T> -void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data); - -void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, const ConstCpuTensorHandle* handle); -} //namespace armnn - - -#define ARMNN_SCOPED_PROFILING_EVENT_NEON(name) \ - ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc, \ - name, \ - armnn::WallClockTimer(), \ - armnn::NeonTimer()) diff --git a/src/armnn/backends/NeonWorkloads.hpp b/src/armnn/backends/NeonWorkloads.hpp deleted file mode 100644 index 676c23cc4d..0000000000 --- a/src/armnn/backends/NeonWorkloads.hpp +++ /dev/null @@ -1,41 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once -#include "backends/NeonWorkloads/NeonActivationFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonActivationUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonBaseConstantWorkload.hpp" -#include "backends/NeonWorkloads/NeonBaseMergerWorkload.hpp" -#include "backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp" -#include "backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonConstantFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonConstantUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp" -#include "backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp" -#include "backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp" -#include "backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonFloorFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonLstmFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonMergerFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonMergerUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonPermuteWorkload.hpp" -#include "backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp" -#include "backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonReshapeUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonSplitterUint8Workload.hpp" -#include "backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp" diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp deleted file mode 100644 index bedf3dcb02..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonActivationFloatWorkload.hpp" -#include "backends/ArmComputeUtils.hpp" - - -namespace armnn -{ - -arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); - - if (input.GetDataType() == DataType::QuantisedAsymm8 && - activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) - { - return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, - "Neon: Logistic Activations unsupported with QAsymm8 data type."}; - } - - return arm_compute::NEActivationLayer::validate(&aclInput, - &aclOutput, - activationLayerInfo); -} - -NeonActivationFloatWorkload::NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<ActivationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonActivationFloatWorkload", 1, 1); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_ActivationLayer.configure(&input, &output, activationLayerInfo); -} - -void NeonActivationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationFloatWorkload_Execute"); - m_ActivationLayer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp deleted file mode 100644 index f8d25ca47d..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor); - -class NeonActivationFloatWorkload : public FloatWorkload<ActivationQueueDescriptor> -{ -public: - NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::NEActivationLayer m_ActivationLayer; -}; -} //namespace armnn - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp deleted file mode 100644 index a9b94d2916..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonActivationUint8Workload.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/NeonLayerSupport.hpp" - -namespace armnn -{ -NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : Uint8Workload<ActivationQueueDescriptor>(descriptor, info) -{ - auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); - arm_compute::ActivationLayerInfo layerInfo(activation, - m_Data.m_Parameters.m_A, - m_Data.m_Parameters.m_B); - - m_Data.ValidateInputsOutputs("NeonActivationUint8Workload", 1, 1); - - arm_compute::ITensor& input = static_cast<NeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = static_cast<NeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_ActivationLayer.configure(&input, &output, layerInfo); -} - -void NeonActivationUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationUint8Workload_Execute"); - - m_ActivationLayer.run(); -} -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp deleted file mode 100644 index 405e600691..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> -{ -public: - NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::NEActivationLayer m_ActivationLayer; -}; - -} //namespace armnn - - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp deleted file mode 100644 index adc34e91c4..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonAdditionFloatWorkload.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/CpuTensorHandle.hpp" - -namespace armnn -{ - -arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::NEArithmeticAddition::validate(&aclInput0, - &aclInput1, - &aclOutput, - arm_compute::ConvertPolicy::SATURATE); -} - - -NeonAdditionFloatWorkload::NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<AdditionQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonAdditionFloatWorkload", 2, 1); - - arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); -} - -void NeonAdditionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionFloatWorkload_Execute"); - m_AddLayer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp deleted file mode 100644 index 154e4f33d1..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class NeonAdditionFloatWorkload : public FloatWorkload<AdditionQueueDescriptor> -{ -public: - NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::NEArithmeticAddition m_AddLayer; -}; - -} //namespace armnn - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp deleted file mode 100644 index f4a09d4aed..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp +++ /dev/null @@ -1,83 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <arm_compute/core/Types.h> -#include <backends/ArmComputeTensorUtils.hpp> -#include <backends/CpuTensorHandle.hpp> -#include <backends/NeonTensorHandle.hpp> -#include <backends/NeonWorkloadUtils.hpp> -#include <backends/Workload.hpp> -#include <Half.hpp> - -#include <boost/cast.hpp> -#include "Half.hpp" - -namespace armnn -{ - -// Base class template providing an implementation of the Constant layer common to all data types. -template <armnn::DataType... DataFormats> -class NeonBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataFormats...> -{ -public: - NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) - : TypedWorkload<ConstantQueueDescriptor, DataFormats...>(descriptor, info) - , m_RanOnce(false) - { - } - - virtual void Execute() const override - { - using namespace armcomputetensorutils; - - // The intermediate tensor held by the corresponding layer output handler can be initialised with the - // given data on the first inference, then reused for subsequent inferences. - // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer - // may not have been configured at the time. - if (!m_RanOnce) - { - const ConstantQueueDescriptor& data = this->m_Data; - - BOOST_ASSERT(data.m_LayerOutput != nullptr); - arm_compute::ITensor& output = - boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor(); - arm_compute::DataType computeDataType = - boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType(); - - switch (computeDataType) - { - case arm_compute::DataType::F16: - { - CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<Half>(), output); - break; - } - case arm_compute::DataType::F32: - { - CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<float>(), output); - break; - } - case arm_compute::DataType::QASYMM8: - { - CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output); - break; - } - default: - { - BOOST_ASSERT_MSG(false, "Unknown data type"); - break; - } - } - - m_RanOnce = true; - } - } - -private: - mutable bool m_RanOnce; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp deleted file mode 100644 index 603e7f3544..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> -#include <backends/Workload.hpp> - -namespace armnn -{ -// Base class template providing an implementation of the Merger layer common to all data types. -template <armnn::DataType... DataTypes> -class NeonBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...> -{ -public: - using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload; - - virtual void Execute() const override - { - // With subtensors, merger is a no-op. - } -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp deleted file mode 100644 index 9288d4427e..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/Workload.hpp> -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -// Base class template providing an implementation of the Splitter layer common to all data types. -template <armnn::DataType... DataTypes> -class NeonBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...> -{ -public: - using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload; - - virtual void Execute() const override - { - // With subtensors, splitter is a no-op. - } -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp deleted file mode 100644 index f47dc0498a..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonBatchNormalizationFloatWorkload.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "../../../../include/armnn/ArmNN.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - - -arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); - const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); - const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); - const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); - - return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo, - &aclOutputInfo, - &aclMeanInfo, - &aclVarInfo, - &aclBetaInfo, - &aclGammaInfo, - descriptor.m_Eps); -} - -NeonBatchNormalizationFloatWorkload::NeonBatchNormalizationFloatWorkload( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloatWorkload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Mean = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); - - m_Variance = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); - - m_Gamma = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); - - m_Beta = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); - - m_Layer.configure(&input, - &output, - m_Mean.get(), - m_Variance.get(), - m_Beta.get(), - m_Gamma.get(), - m_Data.m_Parameters.m_Eps); - - InitializeArmComputeTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); - InitializeArmComputeTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); - InitializeArmComputeTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); - InitializeArmComputeTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_Layer.prepare(); - FreeUnusedTensors(); -} - -void NeonBatchNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationFloatWorkload_Execute"); - m_Layer.run(); -} - -void NeonBatchNormalizationFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_Mean); - FreeTensorIfUnused(m_Variance); - FreeTensorIfUnused(m_Gamma); - FreeTensorIfUnused(m_Beta); -} - -} //namespace armnn - - diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp deleted file mode 100644 index 7982541748..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor); - -class NeonBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor> -{ -public: - NeonBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::NEBatchNormalizationLayer m_Layer; - - std::unique_ptr<arm_compute::Tensor> m_Mean; - std::unique_ptr<arm_compute::Tensor> m_Variance; - std::unique_ptr<arm_compute::Tensor> m_Gamma; - std::unique_ptr<arm_compute::Tensor> m_Beta; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp deleted file mode 100644 index dbdd057101..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonConstantFloatWorkload.hpp" - -namespace armnn -{ - -void NeonConstantFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantFloatWorkload_Execute"); - NeonBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp deleted file mode 100644 index c35b5fda3e..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonBaseConstantWorkload.hpp" - -namespace armnn -{ - -class NeonConstantFloatWorkload : public NeonBaseConstantWorkload<DataType::Float16, DataType::Float32> -{ -public: - using NeonBaseConstantWorkload<DataType::Float16, DataType::Float32>::NeonBaseConstantWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp deleted file mode 100644 index c607d86844..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonConstantUint8Workload.hpp" - -namespace armnn -{ - -void NeonConstantUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantUint8Workload_Execute"); - NeonBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp deleted file mode 100644 index 2cb9516afe..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonBaseConstantWorkload.hpp" - -namespace armnn -{ - -class NeonConstantUint8Workload : public NeonBaseConstantWorkload<DataType::QuantisedAsymm8> -{ -public: - using NeonBaseConstantWorkload<DataType::QuantisedAsymm8>::NeonBaseConstantWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp deleted file mode 100644 index 86ec31c71d..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonConvertFp16ToFp32Workload.hpp" -#include "Half.hpp" -#include "FloatingPointConverter.hpp" - -#include "backends/WorkloadUtils.hpp" - -namespace armnn -{ - -NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) - : Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1); - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -void NeonConvertFp16ToFp32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute"); - - auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) - { - auto input = reinterpret_cast<const Half*>(src); - auto output = reinterpret_cast<float*>(dst); - size_t numElements = size/2; // 2 bytes per fp16 - armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); - }; - - for (const auto& pair : m_TensorHandlePairs) - { - CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); - } -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp deleted file mode 100644 index d70401b5f2..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" -#include "backends/NeonWorkloadUtils.hpp" - -namespace armnn -{ - -class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor> -{ -public: - NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>; - std::vector<TensorHandlePair> m_TensorHandlePairs; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp deleted file mode 100644 index 0f4fbe4e93..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonConvertFp32ToFp16Workload.hpp" - -#include "Half.hpp" -#include "FloatingPointConverter.hpp" - -#include "Profiling.hpp" -#include "backends/WorkloadUtils.hpp" - -namespace armnn -{ - -NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) - : Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1); - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -void NeonConvertFp32ToFp16Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute"); - - auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) - { - auto input = reinterpret_cast<const float*>(src); - auto output = reinterpret_cast<Half*>(dst); - size_t numElements = size/2; // 2 bytes per fp16 - armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); - }; - - for (const auto& pair : m_TensorHandlePairs) - { - CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); - } -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp deleted file mode 100644 index eb839fdd9d..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" -#include "backends/NeonWorkloadUtils.hpp" - -namespace armnn -{ - -class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor> -{ -public: - NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>; - std::vector<TensorHandlePair> m_TensorHandlePairs; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp deleted file mode 100644 index 0e9894ce78..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp +++ /dev/null @@ -1,146 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/NeonLayerSupport.hpp" - -#include "NeonConvolution2dBaseWorkload.hpp" - -#include "armnn/Types.hpp" -#include "Half.hpp" - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.is_initialized()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); - - return arm_compute::NEConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - layerInfo); -} - -template<armnn::DataType... dataTypes> -NeonConvolution2dBaseWorkload<dataTypes...>::NeonConvolution2dBaseWorkload( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>(descriptor, info) -{ - using arm_compute::NEDirectConvolutionLayer; - - ValidateData(); - - // todo: check tensor shapes match. - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_KernelTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo()); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - const bool preferDirectConvolution = - IsNeonDirectConvolutionPreferred(m_Data.m_Weight->GetTensorInfo(), - m_Data.m_Parameters); - - if (preferDirectConvolution) - { - auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>(memoryManager); - directConvolutionLayer->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - m_ConvolutionLayer.reset(directConvolutionLayer.release()); - } - else - { - auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager); - convolutionLayer->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - m_ConvolutionLayer.reset(convolutionLayer.release()); - } - BOOST_ASSERT(m_ConvolutionLayer); - - armnn::DataType dataType = m_Data.m_Weight->GetTensorInfo().GetDataType(); - - switch (dataType) - { - case DataType::Float16: - { - InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Half>()); - break; - } - case DataType::Float32: - { - InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<float>()); - break; - } - case DataType::QuantisedAsymm8: - { - InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<uint8_t>()); - break; - } - default: - { - BOOST_ASSERT_MSG(false, "Unknown DataType."); - } - } -} - -template<armnn::DataType... dataTypes> -void NeonConvolution2dBaseWorkload<dataTypes...>::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -// Generates known implementations for linker. -template class NeonConvolution2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; -template class NeonConvolution2dBaseWorkload<armnn::DataType::QuantisedAsymm8>; - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp deleted file mode 100644 index 77d90cd84b..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/NeonLayerSupport.hpp" -#include "backends/NeonWorkloadUtils.hpp" -#include "backends/Workload.hpp" - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <boost/optional.hpp> - -#include <memory> - -namespace armnn -{ - -arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases); - -template<armnn::DataType... dataTypes> -class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataTypes...> -{ -public: - using TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>::m_Data; - - NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - virtual void ValidateData() const {}; - -protected: - std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer; - - std::unique_ptr<arm_compute::Tensor> m_KernelTensor; - std::unique_ptr<arm_compute::Tensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp deleted file mode 100644 index ca7a0c575a..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonConvolution2dFloatWorkload.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/NeonLayerSupport.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -NeonConvolution2dFloatWorkload::NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) -{ - if (m_Data.m_Parameters.m_BiasEnabled) - { - InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); - } - - m_ConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void NeonConvolution2dFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dFloatWorkload_Execute"); - m_ConvolutionLayer->run(); -} - -void NeonConvolution2dFloatWorkload::ValidateData() const -{ - m_Data.ValidateInputsOutputs("NeonConvolution2dFloatWorkload", 1, 1); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp deleted file mode 100644 index dd8ef55f43..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonConvolution2dBaseWorkload.hpp" -#include <backends/NeonWorkloadUtils.hpp> - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <memory> - -namespace armnn -{ - -class NeonConvolution2dFloatWorkload : public NeonConvolution2dBaseWorkload<DataType::Float16, DataType::Float32> -{ -public: - NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - void Execute() const override; - void ValidateData() const override; -}; - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp deleted file mode 100644 index 5affe682b4..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonConvolution2dUint8Workload.hpp" - -namespace armnn -{ - -NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) -{ - if (m_Data.m_Parameters.m_BiasEnabled) - { - InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>()); - } - - m_ConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void NeonConvolution2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dUint8Workload_Execute"); - m_ConvolutionLayer->run(); -} - -void NeonConvolution2dUint8Workload::ValidateData() const -{ - m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp deleted file mode 100644 index ef60fc3e84..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonConvolution2dBaseWorkload.hpp" - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <memory> - -namespace armnn -{ - -class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8> -{ -public: - NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - virtual void ValidateData() const override; - virtual void Execute() const override; -private: -}; - -} //namespace armnnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp deleted file mode 100644 index e79e14f2ed..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionBaseWorkload.hpp" - -#include "backends/ArmComputeTensorUtils.hpp" - -namespace armnn -{ - -arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases) -{ - const arm_compute::TensorInfo aclInputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeightsInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.is_initialized()); - - aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases.get()); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - const arm_compute::PadStrideInfo aclPadStrideInfo = - armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - aclPadStrideInfo, - aclDepthMultiplier); -} - -}
\ No newline at end of file diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp deleted file mode 100644 index eec432be86..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/NeonWorkloadUtils.hpp" - -#include <boost/optional.hpp> - -namespace armnn -{ - -arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases); - -} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp deleted file mode 100644 index 1ec1417a58..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionFloatWorkload.hpp" -#include "backends/NeonLayerSupport.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - - -namespace armnn -{ -using namespace armcomputetensorutils; - -NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) -{ - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloatWorkload", 1, 1); - - arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; - if (use3x3Optimisation) - { - m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); - static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - else - { - m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); - static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - InitializeArmComputeTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); - } - - m_pDepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void NeonDepthwiseConvolutionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloatWorkload_Execute"); - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - m_pDepthwiseConvolutionLayer->run(); -} - -void NeonDepthwiseConvolutionFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp deleted file mode 100644 index 4ec8c1dc37..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonDepthwiseConvolutionFloatWorkload : public FloatWorkload<DepthwiseConvolution2dQueueDescriptor> -{ -public: - NeonDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; - - std::unique_ptr<arm_compute::Tensor> m_KernelTensor; - std::unique_ptr<arm_compute::Tensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp deleted file mode 100644 index b7813a59c5..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionUint8Workload.hpp" -#include "backends/NeonLayerSupport.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - - -namespace armnn -{ -using namespace armcomputetensorutils; - -NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) -{ - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); - - arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; - if (use3x3Optimisation) - { - m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); - static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - else - { - m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); - static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); - - if (m_BiasTensor) - { - InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); - } - - m_pDepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void NeonDepthwiseConvolutionUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - m_pDepthwiseConvolutionLayer->run(); -} - -void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp deleted file mode 100644 index a0be512f9b..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> -{ -public: - NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; - - std::unique_ptr<arm_compute::Tensor> m_KernelTensor; - std::unique_ptr<arm_compute::Tensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp deleted file mode 100644 index a08ba8a6ec..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonFloorFloatWorkload.hpp" - -namespace armnn -{ -NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<FloorQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonFloorFloatWorkload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void NeonFloorFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute"); - m_Layer.run(); -} -} //namespace armnn - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp deleted file mode 100644 index ad9f44bbf9..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor> -{ -public: - NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::NEFloor m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp deleted file mode 100644 index 2036ecb203..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonFullyConnectedFloatWorkload.hpp" - -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/CpuTensorHandle.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiases; - arm_compute::TensorInfo *optionalAclBiases = nullptr; - if (descriptor.m_BiasEnabled) - { - aclBiases = BuildArmComputeTensorInfo(biases); - optionalAclBiases = &aclBiases; - } - - const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); - - - return arm_compute::NEFullyConnectedLayer::validate(&aclInput, - &aclWeights, - optionalAclBiases, - &aclOutput, - fullyConnectedLayerInfo); -} - -NeonFullyConnectedFloatWorkload::NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<FullyConnectedQueueDescriptor>(descriptor, info) - , m_FullyConnectedLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("NeonFullyConnectedFloatWorkload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_WeightsTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasesTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); - } - - // Construct - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; - m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); - - // Allocate - InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); - - if (m_BiasesTensor) - { - InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_FullyConnectedLayer.prepare(); - FreeUnusedTensors(); -} - -void NeonFullyConnectedFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedFloatWorkload_Execute"); - m_FullyConnectedLayer.run(); -} - -void NeonFullyConnectedFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp deleted file mode 100644 index 27e5717b04..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <memory> - -namespace armnn -{ - -arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor); - -class NeonFullyConnectedFloatWorkload : public FloatWorkload<FullyConnectedQueueDescriptor> -{ -public: - NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - virtual void Execute() const override; - -private: - mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; - - std::unique_ptr<arm_compute::Tensor> m_WeightsTensor; - std::unique_ptr<arm_compute::Tensor> m_BiasesTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp deleted file mode 100644 index 7296e67179..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonL2NormalizationFloatWorkload.hpp" -#include "backends/ArmComputeUtils.hpp" - -namespace armnn -{ - -arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - arm_compute::NormalizationLayerInfo normalizationInfo = - CreateAclNormalizationLayerInfoForL2Normalization(input); - - return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); -} - -NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) - , m_Layer(memoryManager) -{ - m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); -} - -void NeonL2NormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp deleted file mode 100644 index 078c4d140f..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <memory> - -namespace armnn -{ - -arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output); - -class NeonL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor> -{ -public: - NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - virtual void Execute() const override; - -private: - // Purposely not a NEL2Normalize function. See constructor. - mutable arm_compute::NENormalizationLayer m_Layer; -}; - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp deleted file mode 100644 index 8b2b58d9b1..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonLstmFloatWorkload.hpp" - -namespace armnn -{ -NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<LstmQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonLstmFloatWorkload", 1, 1); -} - -void NeonLstmFloatWorkload::Execute() const -{ - throw armnn::Exception("No implementation of Lstm in the Neon backend!"); -} - -} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp deleted file mode 100644 index 6064a017f9..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor> -{ -public: - NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp deleted file mode 100644 index 79039aa51a..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonMergerFloatWorkload.hpp" - -namespace armnn -{ - -void NeonMergerFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerFloatWorkload_Execute"); - NeonBaseMergerWorkload::Execute(); -} - -} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp deleted file mode 100644 index e7088b8c2f..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonBaseMergerWorkload.hpp" - -namespace armnn -{ - -class NeonMergerFloatWorkload : public NeonBaseMergerWorkload<DataType::Float16, DataType::Float32> -{ -public: - using NeonBaseMergerWorkload<DataType::Float16, DataType::Float32>::NeonBaseMergerWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp deleted file mode 100644 index 3989702bd3..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonMergerUint8Workload.hpp" - -namespace armnn -{ - -void NeonMergerUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerUint8Workload_Execute"); - NeonBaseMergerWorkload::Execute(); -} - -} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp deleted file mode 100644 index 73c0fd55ad..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonBaseMergerWorkload.hpp" - -namespace armnn -{ - -class NeonMergerUint8Workload : public NeonBaseMergerWorkload<DataType::QuantisedAsymm8> -{ -public: - using NeonBaseMergerWorkload<DataType::QuantisedAsymm8>::NeonBaseMergerWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp deleted file mode 100644 index c4241ece19..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonMultiplicationFloatWorkload.hpp" - - -namespace armnn -{ - -arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, - // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be - // ignored for F32 tensors. - return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1, - &aclInput2, - &aclOutput, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_ZERO); -} - -NeonMultiplicationFloatWorkload::NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonMultiplicationFloatWorkload", 2, 1); - - arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, - // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be - // ignored for F32 tensors. - m_PixelWiseMultiplication.configure(&input1, - &input2, - &output, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_ZERO); -} - -void NeonMultiplicationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloatWorkload_Execute"); - m_PixelWiseMultiplication.run(); -} - -} //namespace armnn - - diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp deleted file mode 100644 index 4b187b2d42..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ -arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class NeonMultiplicationFloatWorkload : public FloatWorkload<MultiplicationQueueDescriptor> -{ -public: - NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp deleted file mode 100644 index 4534c376d8..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonNormalizationFloatWorkload.hpp" -#include "backends/NeonLayerSupport.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -namespace armnn -{ - -arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - arm_compute::NormalizationLayerInfo normalizationInfo = - armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); - - return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); -} - -NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) - , m_NormalizationLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1); - std::string reasonIfUnsupported; - if (!IsNeonNormalizationDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters)) - { - throw UnimplementedException(reasonIfUnsupported); - } - - // Input and output tensors have to have the same dimensionality. - if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1] - || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0] - || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3] - || info.m_InputTensorInfos[0].GetShape()[2] != info.m_OutputTensorInfos[0].GetShape()[2]) - { - throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality."); - } - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - const arm_compute::NormType normType = - ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType); - arm_compute::NormalizationLayerInfo normalizationInfo(normType, - m_Data.m_Parameters.m_NormSize, - m_Data.m_Parameters.m_Alpha, - m_Data.m_Parameters.m_Beta, - m_Data.m_Parameters.m_K, - false); - - m_NormalizationLayer.configure(&input, &output, normalizationInfo); -} - -void NeonNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute"); - m_NormalizationLayer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp deleted file mode 100644 index 633944ddc9..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -namespace armnn -{ - -arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor); - -class NeonNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor> -{ -public: - NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - virtual void Execute() const override; - -private: - mutable arm_compute::NENormalizationLayer m_NormalizationLayer; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp deleted file mode 100644 index 9bc76ba853..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonPermuteWorkload.hpp" -#include "backends/NeonTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -#include <arm_compute/core/Error.h> - -namespace armnn -{ - -arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - const armnn::PermutationVector& mappings = descriptor.m_DimMappings; - - return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo, - armcomputetensorutils::BuildArmComputePermutationVector(mappings)); -} - -template <armnn::DataType... DataTypes> -NeonPermuteWorkload<DataTypes...>::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info) -{ - using armcomputetensorutils::BuildArmComputePermutationVector; - - m_Data.ValidateInputsOutputs(GetName(), 1, 1); - - const arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - - // Run the layer. - m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); -} - -template <armnn::DataType... DataTypes> -void NeonPermuteWorkload<DataTypes...>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); - m_PermuteFunction.run(); -} - -template class NeonPermuteWorkload<DataType::Float16, DataType::Float32>; -template class NeonPermuteWorkload<DataType::QuantisedAsymm8>; - -} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp deleted file mode 100644 index 1fe05b1645..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" -#include "backends/NeonWorkloadUtils.hpp" - -#include <armnn/TypesUtils.hpp> -#include <arm_compute/runtime/NEON/functions/NEPermute.h> - -#include <string> - -namespace armnn -{ -arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const PermuteDescriptor& descriptor); - -template <armnn::DataType... DataTypes> -class NeonPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...> -{ -public: - static const std::string& GetName() - { - static const std::string name = std::string("NeonPermuteWorkload"); - return name; - } - - NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data; - mutable arm_compute::NEPermute m_PermuteFunction; -}; - -using NeonPermuteFloatWorkload = NeonPermuteWorkload<DataType::Float16, DataType::Float32>; -using NeonPermuteUint8Workload = NeonPermuteWorkload<DataType::QuantisedAsymm8>; - -} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp deleted file mode 100644 index 208d08c4c5..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonPooling2dBaseWorkload.hpp" -#include "backends/NeonLayerSupport.hpp" -#include "backends/NeonTensorHandle.hpp" -#include "backends/ArmComputeUtils.hpp" -#include "backends/ArmComputeTensorUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); - - return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); -} - -template <armnn::DataType... dataTypes> -NeonPooling2dBaseWorkload<dataTypes...>::NeonPooling2dBaseWorkload( - const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) - : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info) -{ - m_Data.ValidateInputsOutputs(name, 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); - - m_PoolingLayer.configure(&input, &output, layerInfo); -} - -template class NeonPooling2dBaseWorkload<DataType::Float16, DataType::Float32>; -template class NeonPooling2dBaseWorkload<DataType::QuantisedAsymm8>; - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp deleted file mode 100644 index 77d6bf2f06..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp +++ /dev/null @@ -1,37 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor); - -// Base class template providing an implementation of the Pooling2d layer common to all data types. -template <armnn::DataType... dataTypes> -class NeonPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...> -{ -public: - using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data; - - NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, - const std::string& name); - -protected: - mutable arm_compute::NEPoolingLayer m_PoolingLayer; -}; - - -} //namespace armnn - - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp deleted file mode 100644 index 46996b088c..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonPooling2dFloatWorkload.hpp" - - - -namespace armnn -{ - -NeonPooling2dFloatWorkload::NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : NeonPooling2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>(descriptor, info, - "NeonPooling2dFloatWorkload") -{ -} - -void NeonPooling2dFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dFloatWorkload_Execute"); - m_PoolingLayer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp deleted file mode 100644 index 78a35748bb..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> -#include "NeonPooling2dBaseWorkload.hpp" - -namespace armnn -{ - -class NeonPooling2dFloatWorkload : public NeonPooling2dBaseWorkload<armnn::DataType::Float16, - armnn::DataType::Float32> -{ -public: - NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; -}; - -} //namespace armnn - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp deleted file mode 100644 index 8f99a2be86..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonPooling2dUint8Workload.hpp" - - - -namespace armnn -{ - -NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8>(descriptor, info, "NeonPooling2dUint8Workload") -{ -} - -void NeonPooling2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dUint8Workload_Execute"); - m_PoolingLayer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp deleted file mode 100644 index d475c5f721..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Types.hpp> -#include "NeonPooling2dBaseWorkload.hpp" - -namespace armnn -{ - -class NeonPooling2dUint8Workload : public NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8> -{ -public: - NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp deleted file mode 100644 index 2dae9466bb..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonReshapeFloatWorkload.hpp" - - - -namespace armnn -{ - -NeonReshapeFloatWorkload::NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonReshapeFloatWorkload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void NeonReshapeFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp deleted file mode 100644 index 066765adeb..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonReshapeFloatWorkload : public FloatWorkload<ReshapeQueueDescriptor> -{ -public: - NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); - - virtual void Execute() const override; - -private: - mutable arm_compute::NEReshapeLayer m_Layer; -}; - -} //namespace armnn - - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp deleted file mode 100644 index 41aa07fe49..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonReshapeUint8Workload.hpp" - - - - -namespace armnn -{ -NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) - : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonReshapeUint8Workload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void NeonReshapeUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeUint8Workload_Execute"); - m_Layer.run(); -} -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp deleted file mode 100644 index 3f7c470323..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> -{ -public: - NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::NEReshapeLayer m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp deleted file mode 100644 index ca9e4f058d..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonSoftmaxBaseWorkload.hpp" - -#include "backends/ArmComputeTensorUtils.hpp" - -namespace armnn -{ - -arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor) -{ - // NOTE: We report 4D Softmax as unsupported until full support is added to ACL - if(input.GetShape().GetNumDimensions() >= 4u) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); - } - - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::NESoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp deleted file mode 100644 index 24910df7c7..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/NeonWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor); - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp deleted file mode 100644 index 92e5139c1a..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonSoftmaxFloatWorkload.hpp" - -namespace armnn -{ - -NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1); - - // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions. - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); -} - -void NeonSoftmaxFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloatWorkload_Execute"); - m_SoftmaxLayer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp deleted file mode 100644 index 47745c658f..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include <memory> - -namespace armnn -{ - -class NeonSoftmaxFloatWorkload : public FloatWorkload<SoftmaxQueueDescriptor> -{ -public: - NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - virtual void Execute() const override; - -private: - mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp deleted file mode 100644 index cff869c9b7..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonSoftmaxUint8Workload.hpp" - -namespace armnn -{ - -NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - const auto outputQuantization = output.info()->quantization_info(); - - if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) - { - throw InvalidArgumentException( - "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); - } - - m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); -} - -void NeonSoftmaxUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute"); - - m_SoftmaxLayer.run(); -} - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp deleted file mode 100644 index f894c5a958..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -namespace armnn -{ - -class NeonSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> -{ -public: - NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - virtual void Execute() const override; - -private: - mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp deleted file mode 100644 index 39ed5b7cbc..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonSplitterFloatWorkload.hpp" - -namespace armnn -{ - -void NeonSplitterFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterFloatWorkload_Execute"); - NeonBaseSplitterWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp deleted file mode 100644 index 744a4fe216..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonBaseSplitterWorkload.hpp" - -namespace armnn -{ - -class NeonSplitterFloatWorkload : public NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32> -{ -public: - using NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32>::NeonBaseSplitterWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp deleted file mode 100644 index 4b2cf8fc91..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonSplitterUint8Workload.hpp" - -namespace armnn -{ - -void NeonSplitterUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterUint8Workload_Execute"); - NeonBaseSplitterWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp deleted file mode 100644 index f219cfaa7d..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NeonBaseSplitterWorkload.hpp" - -namespace armnn -{ - -class NeonSplitterUint8Workload : public NeonBaseSplitterWorkload<DataType::QuantisedAsymm8> -{ -public: - using NeonBaseSplitterWorkload<DataType::QuantisedAsymm8>::NeonBaseSplitterWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp deleted file mode 100644 index 3f37d82d22..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonSubtractionFloatWorkload.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#include "backends/CpuTensorHandle.hpp" - -namespace armnn -{ - -arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::NEArithmeticSubtraction::validate(&aclInput0, - &aclInput1, - &aclOutput, - arm_compute::ConvertPolicy::SATURATE); -} - -NeonSubtractionFloatWorkload::NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<SubtractionQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("NeonSubtractionFloatWorkload", 2, 1); - - arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_SubLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); -} - -void NeonSubtractionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionFloatWorkload_Execute"); - m_SubLayer.run(); -} - -} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp deleted file mode 100644 index 18988a35ca..0000000000 --- a/src/armnn/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class NeonSubtractionFloatWorkload : public FloatWorkload<SubtractionQueueDescriptor> -{ -public: - NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::NEArithmeticSubtraction m_SubLayer; -}; - -} //namespace armnn diff --git a/src/armnn/backends/OutputHandler.cpp b/src/armnn/backends/OutputHandler.cpp deleted file mode 100644 index c1be5b7dc4..0000000000 --- a/src/armnn/backends/OutputHandler.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "OutputHandler.hpp" - -#include <boost/assert.hpp> -#include <boost/log/trivial.hpp> - -#include "backends/WorkloadFactory.hpp" -#include "backends/WorkloadDataCollector.hpp" -#include "backends/ITensorHandle.hpp" - -namespace armnn -{ - -void OutputHandler::SetTensorInfo(const TensorInfo& tensorInfo) -{ - m_TensorInfo = tensorInfo; - m_bTensorInfoSet = true; -} - -void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory) -{ - m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo); -} - -void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const -{ - dataCollector.Push(m_TensorHandle.get(), m_TensorInfo); -} - -} // namespace armnn diff --git a/src/armnn/backends/OutputHandler.hpp b/src/armnn/backends/OutputHandler.hpp deleted file mode 100644 index dfc01844c9..0000000000 --- a/src/armnn/backends/OutputHandler.hpp +++ /dev/null @@ -1,63 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "backends/WorkloadDataFwd.hpp" - -#include <string> -#include <vector> - -#include <memory> -#include <set> - -#include <boost/assert.hpp> - -#include "armnn/INetwork.hpp" -#include "armnn/Types.hpp" -#include "armnn/Descriptors.hpp" -#include "armnn/Tensor.hpp" -#include "ITensorHandle.hpp" - -namespace armnn -{ - -class ITensorHandle; -class IWorkloadFactory; -class OutputSlot; -class WorkloadDataCollector; - -class OutputHandler -{ -public: - /// @brief - Sets the TensorInfo used by this output handler. - /// @param tensorInfo - TensorInfo for the output. - void SetTensorInfo(const TensorInfo& tensorInfo); - - /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory. - /// @param factory - Factory to be used for handler creation. - void CreateTensorHandles(const IWorkloadFactory& factory); - - /// @brief - Gets the matching TensorInfo for the output. - /// @return - References to the output TensorInfo. - const TensorInfo& GetTensorInfo() const { return m_TensorInfo; } - - /// @brief - Gets the allocated tensor memory. - /// @return - Pointer to the tensor memory. - ITensorHandle* GetData() const { return m_TensorHandle.get(); } - - /// Fill the outputs for a given queue descriptor. - void CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const; - - void SetData(std::unique_ptr<ITensorHandle> data) { m_TensorHandle = std::move(data); } - - /// @brief Returns true if SetTensorInfo() has been called at least once on this. - bool IsTensorInfoSet() const { return m_bTensorInfoSet; } -private: - std::unique_ptr<ITensorHandle> m_TensorHandle; - TensorInfo m_TensorInfo; - bool m_bTensorInfoSet = false; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefLayerSupport.cpp b/src/armnn/backends/RefLayerSupport.cpp deleted file mode 100644 index d56cdebeda..0000000000 --- a/src/armnn/backends/RefLayerSupport.cpp +++ /dev/null @@ -1,398 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "LayerSupportCommon.hpp" -#include "RefLayerSupport.hpp" -#include <armnn/Descriptors.hpp> -#include <armnn/Types.hpp> -#include <armnn/Tensor.hpp> - -#include <boost/core/ignore_unused.hpp> -#include "InternalTypes.hpp" - -using namespace boost; - -namespace armnn -{ - -template<typename Float32Func, typename Uint8Func, typename ... Params> -bool IsSupportedForDataTypeRef(std::string* reasonIfUnsupported, - DataType dataType, - Float32Func floatFuncPtr, - Uint8Func uint8FuncPtr, - Params&&... params) -{ - return IsSupportedForDataTypeGeneric(reasonIfUnsupported, - dataType, - &FalseFunc<Params...>, - floatFuncPtr, - uint8FuncPtr, - std::forward<Params>(params)...); -} - -bool IsActivationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsAdditionSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(input1); - ignore_unused(output); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input0.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsBatchNormalizationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsConstantSupportedRef(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeRef(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsConvolution2dSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - ignore_unused(output); - ignore_unused(weights); - ignore_unused(biases); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - ignore_unused(descriptor); - ignore_unused(weights); - ignore_unused(biases); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsDivisionSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(input1); - ignore_unused(output); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input0.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsSubtractionSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(input1); - ignore_unused(output); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input0.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsFullyConnectedSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - ignore_unused(descriptor); - ignore_unused(weights); - ignore_unused(biases); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsInputSupportedRef(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsL2NormalizationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsMergerSupportedRef(const std::vector<const TensorInfo*> inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - inputs[0]->GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsMultiplicationSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(input1); - ignore_unused(output); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input0.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsNormalizationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsOutputSupportedRef(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeRef(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsPermuteSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsPooling2dSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsResizeBilinearSupportedRef(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsSoftmaxSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsSplitterSupportedRef(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsFakeQuantizationSupportedRef(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsReshapeSupportedRef(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsFloorSupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - return IsSupportedForDataTypeRef(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsLstmSupportedRef(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(outputStateIn); - ignore_unused(cellStateIn); - ignore_unused(scratchBuffer); - ignore_unused(outputStateOut); - ignore_unused(cellStateOut); - ignore_unused(output); - ignore_unused(descriptor); - ignore_unused(inputToForgetWeights); - ignore_unused(inputToCellWeights); - ignore_unused(inputToOutputWeights); - ignore_unused(recurrentToForgetWeights); - ignore_unused(recurrentToCellWeights); - ignore_unused(recurrentToOutputWeights); - ignore_unused(forgetGateBias); - ignore_unused(cellBias); - ignore_unused(outputGateBias); - ignore_unused(inputToInputWeights); - ignore_unused(recurrentToInputWeights); - ignore_unused(cellToInputWeights); - ignore_unused(inputGateBias); - ignore_unused(projectionWeights); - ignore_unused(projectionBias); - ignore_unused(cellToForgetWeights); - ignore_unused(cellToOutputWeights); - return false; -} - -bool IsConvertFp16ToFp32SupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return (IsSupportedForDataTypeGeneric(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseInputFuncF32<>, - &FalseFuncU8<>) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - output.GetDataType(), - &FalseOutputFuncF16<>, - &TrueFunc<>, - &FalseFuncU8<>)); -} - -bool IsConvertFp32ToFp16SupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return (IsSupportedForDataTypeGeneric(reasonIfUnsupported, - input.GetDataType(), - &FalseInputFuncF16<>, - &TrueFunc<>, - &FalseFuncU8<>) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &FalseOutputFuncF32<>, - &FalseFuncU8<>)); -} - -bool IsMeanSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - return false; -} - -} diff --git a/src/armnn/backends/RefLayerSupport.hpp b/src/armnn/backends/RefLayerSupport.hpp deleted file mode 100644 index ff2e7e387f..0000000000 --- a/src/armnn/backends/RefLayerSupport.hpp +++ /dev/null @@ -1,155 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/DescriptorsFwd.hpp> -#include <armnn/Types.hpp> -#include <armnn/Tensor.hpp> -#include <layers/LstmLayer.hpp> -#include <boost/optional.hpp> - -#include <boost/optional.hpp> - -namespace armnn -{ - -bool IsActivationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsAdditionSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsBatchNormalizationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsConstantSupportedRef(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvolution2dSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported = nullptr); - -bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional<TensorInfo>& biases, - std::string* reasonIfUnsupported = nullptr); - -bool IsDivisionSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsSubtractionSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsFullyConnectedSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsInputSupportedRef(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsL2NormalizationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsLstmSupportedRef(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); - -bool IsMergerSupportedRef(const std::vector<const TensorInfo*> inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsMultiplicationSupportedRef(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsNormalizationSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsOutputSupportedRef(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsPermuteSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsPooling2dSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsResizeBilinearSupportedRef(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsSoftmaxSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsSplitterSupportedRef(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsFakeQuantizationSupportedRef(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsReshapeSupportedRef(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsFloorSupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp16ToFp32SupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp32ToFp16SupportedRef(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsMeanSupportedRef(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -} diff --git a/src/armnn/backends/RefWorkloadFactory.cpp b/src/armnn/backends/RefWorkloadFactory.cpp deleted file mode 100644 index 93b4d946c4..0000000000 --- a/src/armnn/backends/RefWorkloadFactory.cpp +++ /dev/null @@ -1,249 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "CpuTensorHandle.hpp" -#include "RefWorkloadFactory.hpp" -#include "RefWorkloads.hpp" -#include "Layer.hpp" -#include "MemCopyWorkload.hpp" -#include "MakeWorkloadHelper.hpp" - -#include <boost/log/trivial.hpp> - -namespace armnn -{ - -template <typename F32Workload, typename U8Workload, typename QueueDescriptorType> -std::unique_ptr<IWorkload> RefWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, - const WorkloadInfo& info) const -{ - return armnn::MakeWorkload<NullWorkload, F32Workload, U8Workload>(descriptor, info); -} - -RefWorkloadFactory::RefWorkloadFactory() -{ -} - -bool RefWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported) -{ - return IWorkloadFactory::IsLayerSupported(Compute::CpuRef, layer, dataType, outReasonIfUnsupported); -} - -std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - return std::make_unique<ScopedCpuTensorHandle>(tensorInfo); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (info.m_InputTensorInfos.empty() ) - { - throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length"); - } - if (info.m_OutputTensorInfos.empty()) - { - throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length"); - } - - if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) - { - throw InvalidArgumentException("RefWorkloadFactory::CreateInput: data input and output differ in byte count."); - } - - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (info.m_InputTensorInfos.empty() ) - { - throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length"); - } - if (info.m_OutputTensorInfos.empty()) - { - throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length"); - } - if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) - { - throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output differ in byte count."); - } - - return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefActivationFloat32Workload, RefActivationUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefSoftmaxFloat32Workload, RefSoftmaxUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefSplitterFloat32Workload, RefSplitterUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefMergerFloat32Workload, RefMergerUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateFullyConnected( - const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefFullyConnectedFloat32Workload, RefFullyConnectedUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefPermuteFloat32Workload, RefPermuteUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefPooling2dFloat32Workload, RefPooling2dUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateConvolution2d( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefConvolution2dFloat32Workload, RefConvolution2dUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefDepthwiseConvolution2dFloat32Workload, - RefDepthwiseConvolution2dUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateNormalization( - const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefNormalizationFloat32Workload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefAdditionFloat32Workload, RefAdditionUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMultiplication( - const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefMultiplicationFloat32Workload, RefMultiplicationUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateBatchNormalization( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefBatchNormalizationFloat32Workload, RefBatchNormalizationUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Inputs.empty()) - { - throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor."); - } - return std::make_unique<CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefResizeBilinearFloat32Workload, RefResizeBilinearUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFakeQuantization( - const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefFakeQuantizationFloat32Workload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefL2NormalizationFloat32Workload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefConstantFloat32Workload, RefConstantUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefReshapeFloat32Workload, RefReshapeUint8Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefFloorFloat32Workload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<RefLstmFloat32Workload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<RefConvertFp16ToFp32Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<RefConvertFp32ToFp16Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateDivision( - const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefDivisionFloat32Workload, RefDivisionUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateSubtraction( - const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<RefSubtractionFloat32Workload, RefSubtractionUint8Workload>(descriptor, info); -} - -std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMean( - const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - - -} // namespace armnn diff --git a/src/armnn/backends/RefWorkloadFactory.hpp b/src/armnn/backends/RefWorkloadFactory.hpp deleted file mode 100644 index 6b13377167..0000000000 --- a/src/armnn/backends/RefWorkloadFactory.hpp +++ /dev/null @@ -1,145 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "WorkloadFactory.hpp" -#include "OutputHandler.hpp" - -#include <boost/core/ignore_unused.hpp> -#include <boost/optional.hpp> - -namespace armnn -{ - -template <typename QueueDescriptorType> -constexpr bool IsOperationQueueDescriptor(const QueueDescriptorType&) { return true; } - -template <> -constexpr bool IsOperationQueueDescriptor(const MemCopyQueueDescriptor&) { return false; } - -template <> -constexpr bool IsOperationQueueDescriptor(const ConstantQueueDescriptor&) { return false; } - -template <> -constexpr bool IsOperationQueueDescriptor(const PermuteQueueDescriptor&) { return false; } - -// Reference workload factory. -class RefWorkloadFactory : public IWorkloadFactory -{ -public: - explicit RefWorkloadFactory(); - virtual ~RefWorkloadFactory() {} - - virtual Compute GetCompute() const override { return Compute::CpuRef; } - - static bool IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported); - - virtual bool SupportsSubTensors() const override { return false; } - - virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const override - { - boost::ignore_unused(parent, subTensorShape, subTensorOrigin); - return nullptr; - } - - virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; - - virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& Info) const override; - - virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; -private: - - template <typename F32Workload, typename U8Workload, typename QueueDescriptorType> - std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) const; - -}; - -} // namespace armnn diff --git a/src/armnn/backends/RefWorkloads.hpp b/src/armnn/backends/RefWorkloads.hpp deleted file mode 100644 index 746a59e71f..0000000000 --- a/src/armnn/backends/RefWorkloads.hpp +++ /dev/null @@ -1,53 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/RefWorkloads/RefConstantUint8Workload.hpp" -#include "backends/RefWorkloads/ArithmeticFunction.hpp" -#include "backends/RefWorkloads/RefArithmeticWorkload.hpp" -#include "backends/RefWorkloads/ConvImpl.hpp" -#include "backends/RefWorkloads/RefBaseConstantWorkload.hpp" -#include "backends/RefWorkloads/RefConvolution2dUint8Workload.hpp" -#include "backends/RefWorkloads/RefSplitterUint8Workload.hpp" -#include "backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp" -#include "backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp" -#include "backends/RefWorkloads/RefActivationUint8Workload.hpp" -#include "backends/RefWorkloads/RefPooling2dFloat32Workload.hpp" -#include "backends/RefWorkloads/RefWorkloadUtils.hpp" -#include "backends/RefWorkloads/RefMergerUint8Workload.hpp" -#include "backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp" -#include "backends/RefWorkloads/Softmax.hpp" -#include "backends/RefWorkloads/RefMergerFloat32Workload.hpp" -#include "backends/RefWorkloads/TensorBufferArrayView.hpp" -#include "backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp" -#include "backends/RefWorkloads/Splitter.hpp" -#include "backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp" -#include "backends/RefWorkloads/RefReshapeFloat32Workload.hpp" -#include "backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp" -#include "backends/RefWorkloads/FullyConnected.hpp" -#include "backends/RefWorkloads/RefFloorFloat32Workload.hpp" -#include "backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp" -#include "backends/RefWorkloads/RefSoftmaxUint8Workload.hpp" -#include "backends/RefWorkloads/RefReshapeUint8Workload.hpp" -#include "backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp" -#include "backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp" -#include "backends/RefWorkloads/ResizeBilinear.hpp" -#include "backends/RefWorkloads/RefNormalizationFloat32Workload.hpp" -#include "backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp" -#include "backends/RefWorkloads/RefPooling2dUint8Workload.hpp" -#include "backends/RefWorkloads/BatchNormImpl.hpp" -#include "backends/RefWorkloads/Activation.hpp" -#include "backends/RefWorkloads/Merger.hpp" -#include "backends/RefWorkloads/RefSplitterFloat32Workload.hpp" -#include "backends/RefWorkloads/RefConstantFloat32Workload.hpp" -#include "backends/RefWorkloads/RefActivationFloat32Workload.hpp" -#include "backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp" -#include "backends/RefWorkloads/Pooling2d.hpp" -#include "backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp" -#include "backends/RefWorkloads/RefPermuteWorkload.hpp" -#include "backends/RefWorkloads/RefLstmFloat32Workload.hpp" -#include "backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp" -#include "backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp" diff --git a/src/armnn/backends/RefWorkloads/Activation.cpp b/src/armnn/backends/RefWorkloads/Activation.cpp deleted file mode 100644 index ef4903074b..0000000000 --- a/src/armnn/backends/RefWorkloads/Activation.cpp +++ /dev/null @@ -1,91 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "Activation.hpp" - -#include <boost/log/trivial.hpp> - -#include <cmath> - -namespace armnn -{ - -void Activation(const float* in, - float* out, - const TensorInfo& tensorInfo, - ActivationFunction function, - float a, - float b) -{ - for (size_t i = 0; i<tensorInfo.GetNumElements(); i++) - { - float input = in[i]; - float output; - - // Compute the result of the activation function. - switch (function) - { - case ActivationFunction::Linear: - { - output = a * input + b; - break; - } - case ActivationFunction::Sigmoid: - { - output = 1.f / (1.f + expf(-input)); - break; - } - case ActivationFunction::ReLu: - { - output = std::max(0.f, input); - break; - } - case ActivationFunction::BoundedReLu: - { - output = std::min(a, std::max(b, input)); - break; - } - case ActivationFunction::SoftReLu: - { - output = logf(1.0f + expf(input)); - break; - } - case ActivationFunction::LeakyReLu: - { - output = input > 0.0f ? input : (input * a); - break; - } - case ActivationFunction::Abs: - { - output = input < 0 ? -input : input; - break; - } - case ActivationFunction::Sqrt: - { - output = sqrtf(input); - break; - } - case ActivationFunction::Square: - { - output = input * input; - break; - } - case ActivationFunction::TanH: - { - output = a * tanhf(b * input); - break; - } - default: - { - BOOST_LOG_TRIVIAL(error) << "Unsupported activation function"; - return; - } - } - - out[i] = output; - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Activation.hpp b/src/armnn/backends/RefWorkloads/Activation.hpp deleted file mode 100644 index c8a23114f0..0000000000 --- a/src/armnn/backends/RefWorkloads/Activation.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <armnn/Tensor.hpp> -#include <armnn/Types.hpp> - -namespace armnn -{ - -/// Performs the ActivationFunction elementwise on the inputs to give the outputs. -void Activation(const float* in, - float* out, - const TensorInfo& tensorInfo, - ActivationFunction function, - float a, - float b); - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/ArithmeticFunction.cpp b/src/armnn/backends/RefWorkloads/ArithmeticFunction.cpp deleted file mode 100644 index fede138253..0000000000 --- a/src/armnn/backends/RefWorkloads/ArithmeticFunction.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ArithmeticFunction.hpp" -#include "Broadcast.hpp" -#include <functional> - -namespace armnn -{ - -template <typename Functor> -ArithmeticFunction<Functor>::ArithmeticFunction(const TensorShape& inShape0, - const TensorShape& inShape1, - const TensorShape& outShape, - const float* inData0, - const float* inData1, - float* outData) -{ - BroadcastLoop(inShape0, inShape1, outShape).Unroll(Functor(), 0, inData0, inData1, outData); -} - -} //namespace armnn - -template struct armnn::ArithmeticFunction<std::plus<float>>; -template struct armnn::ArithmeticFunction<std::minus<float>>; -template struct armnn::ArithmeticFunction<std::multiplies<float>>; -template struct armnn::ArithmeticFunction<std::divides<float>>; diff --git a/src/armnn/backends/RefWorkloads/ArithmeticFunction.hpp b/src/armnn/backends/RefWorkloads/ArithmeticFunction.hpp deleted file mode 100644 index eafb6444f6..0000000000 --- a/src/armnn/backends/RefWorkloads/ArithmeticFunction.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> - -namespace armnn -{ - -template <typename Functor> -struct ArithmeticFunction -{ - ArithmeticFunction(const TensorShape& inShape0, - const TensorShape& inShape1, - const TensorShape& outShape, - const float* inData0, - const float* inData1, - float* outData); -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp b/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp deleted file mode 100644 index a7579c8373..0000000000 --- a/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "RefWorkloadUtils.hpp" - -#include <armnn/Tensor.hpp> - -#include <cmath> - -namespace armnn -{ - -template<typename NormData> -static void BatchNormImpl(NormData data, - const float* varIn, - const float* meanIn, - const float* gammaIn, - const float* betaIn, - float * outputData, - const float * inputData) -{ - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); - for (unsigned int c = 0; c < inputInfo0.GetShape()[1]; c++) - { - float var = varIn[c]; - float mean = meanIn[c]; - float gamma = gammaIn[c]; - float beta = betaIn[c]; - - float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps); - float add = beta - mult * mean; - - for (unsigned int n = 0; n < inputInfo0.GetShape()[0]; n++) - { - for (unsigned int j = 0; j < inputInfo0.GetShape()[2]; j++) - { - for (unsigned int i = 0; i < inputInfo0.GetShape()[3]; i++) - { - unsigned int index = i + - j*inputInfo0.GetShape()[3] + - c*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] + - n*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] - * inputInfo0.GetShape()[1]; - - outputData[index] = mult * inputData[index] + add; - } - } - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Broadcast.cpp b/src/armnn/backends/RefWorkloads/Broadcast.cpp deleted file mode 100644 index 8421a0a7ed..0000000000 --- a/src/armnn/backends/RefWorkloads/Broadcast.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "Broadcast.hpp" - -namespace armnn -{ - -BroadcastLoop::BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape) -: m_DimData(outShape.GetNumDimensions()) -{ - const unsigned int numDims = GetNumDimensions(); - - unsigned int sIn0 = 1; - unsigned int sIn1 = 1; - unsigned int sOut = 1; - - for (unsigned int j = numDims - 1, k = 0; k < numDims ; k++, j--) - { - m_DimData[j].m_DimSize = outShape[j]; - m_DimData[j].m_Stride1 = (inShape0[j] > 1) ? sIn0 : 0; - m_DimData[j].m_Stride2 = (inShape1[j] > 1) ? sIn1 : 0; - m_DimData[j].m_StrideOut = sOut; - - sIn0 *= inShape0[j]; - sIn1 *= inShape1[j]; - sOut *= outShape[j]; - } -} - -} // namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Broadcast.hpp b/src/armnn/backends/RefWorkloads/Broadcast.hpp deleted file mode 100644 index e92ed0598d..0000000000 --- a/src/armnn/backends/RefWorkloads/Broadcast.hpp +++ /dev/null @@ -1,58 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <armnn/Tensor.hpp> - -#include <functional> - -namespace armnn -{ - -struct BroadcastLoop -{ - BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape); - - unsigned int GetNumDimensions() - { - return static_cast<unsigned int>(m_DimData.size()); - } - - template <typename T0, typename T1, typename U, typename Func> - void Unroll(Func operationFunc, - unsigned int dimension, - const T0* inData0, - const T1* inData1, - U* outData) - { - if (dimension >= GetNumDimensions()) - { - *outData = operationFunc(*inData0, *inData1); - return; - } - - for (unsigned int i = 0; i < m_DimData[dimension].m_DimSize; i++) - { - Unroll(operationFunc, dimension + 1, inData0, inData1, outData); - - inData0 += m_DimData[dimension].m_Stride1; - inData1 += m_DimData[dimension].m_Stride2; - outData += m_DimData[dimension].m_StrideOut; - } - } - -private: - // Struct to hold the dimension data. - struct BroadcastDimensionData - { - unsigned int m_DimSize; - unsigned int m_StrideOut; - unsigned int m_Stride1; - unsigned int m_Stride2; - }; - - std::vector<BroadcastDimensionData> m_DimData; -}; - -} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.cpp b/src/armnn/backends/RefWorkloads/ConvImpl.cpp deleted file mode 100644 index 8743a2bd0d..0000000000 --- a/src/armnn/backends/RefWorkloads/ConvImpl.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ConvImpl.hpp" - -#include <boost/assert.hpp> - -#include <cmath> -#include <limits> - -namespace armnn -{ - -QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier) -{ - BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f); - if (multiplier == 0.0f) - { - m_Multiplier = 0; - m_RightShift = 0; - } - else - { - const double q = std::frexp(multiplier, &m_RightShift); - m_RightShift = -m_RightShift; - int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31))); - BOOST_ASSERT(qFixed <= (1ll << 31)); - if (qFixed == (1ll << 31)) - { - qFixed /= 2; - --m_RightShift; - } - BOOST_ASSERT(m_RightShift >= 0); - BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max()); - m_Multiplier = static_cast<int32_t>(qFixed); - } -} - -int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const -{ - int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier); - return RoundingDivideByPOT(x, m_RightShift); -} - -int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) -{ - // Check for overflow. - if (a == b && a == std::numeric_limits<int32_t>::min()) - { - return std::numeric_limits<int32_t>::max(); - } - int64_t a_64(a); - int64_t b_64(b); - int64_t ab_64 = a_64 * b_64; - int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); - int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31)); - return ab_x2_high32; -} - -int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent) -{ - BOOST_ASSERT(exponent >= 0 && exponent <= 31); - int32_t mask = (1 << exponent) - 1; - int32_t remainder = x & mask; - int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0); - return (x >> exponent) + (remainder > threshold ? 1 : 0); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.hpp b/src/armnn/backends/RefWorkloads/ConvImpl.hpp deleted file mode 100644 index 4c9ab2a644..0000000000 --- a/src/armnn/backends/RefWorkloads/ConvImpl.hpp +++ /dev/null @@ -1,187 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "RefWorkloadUtils.hpp" - -#include <armnn/Tensor.hpp> - -#include <boost/assert.hpp> -#include <boost/numeric/conversion/cast.hpp> - -#include <cmath> -#include <limits> - -namespace armnn -{ - -/// Performs multiplication of an integer with a multiplier which is less than one, -/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. -struct QuantizedMultiplierSmallerThanOne -{ -public: - /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier. - /// This stores the appropriate integer quantities (derived from the given multiplier) for later use. - /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne(). - QuantizedMultiplierSmallerThanOne(float multiplier); - - /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne(). - int32_t operator*(int32_t rhs) const; - -private: - /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul(). - static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b); - - /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT(). - static int32_t RoundingDivideByPOT(int32_t x, int exponent); - - int32_t m_Multiplier; - int32_t m_RightShift; -}; - -/// An implementation shared by normal and depthwise convolution. -template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType> -static void ConvImpl(ConvData data, - const InputType* inputData, - float inputScale, - int32_t inputOffset, - const InputType* filterData, - float filterScale, - int32_t filterOffset, - const BiasType* biasData, - InputType* outputData, - float outputScale, - int32_t outputOffset, - const TensorInfo& filterInfo, - bool depthwise = false) -{ - if (data.m_Parameters.m_BiasEnabled && !biasData) - { - throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); - } - - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); - const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); - - unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; - unsigned int channelsInput = filterInfo.GetShape()[1]; - unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0]; - - unsigned int batchSize = outputInfo0.GetShape()[0]; - unsigned int heightOutput = outputInfo0.GetShape()[2]; - unsigned int widthOutput = outputInfo0.GetShape()[3]; - unsigned int heightInput = inputInfo0.GetShape()[2]; - unsigned int widthInput = inputInfo0.GetShape()[3]; - - unsigned int heightFilter = filterInfo.GetShape()[2]; - unsigned int widthFilter = filterInfo.GetShape()[3]; - - unsigned int paddingTop = data.m_Parameters.m_PadTop; - unsigned int paddingLeft = data.m_Parameters.m_PadLeft; - unsigned int hStride = data.m_Parameters.m_StrideY; - unsigned int xStride = data.m_Parameters.m_StrideX; - - // The world's least efficient convolution. - for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) - { - for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) - { - for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++) - { - for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) - { - // This loop goes over each output element. - AccumulatorType sum = AccumulatorType(); - - // For depthwise, each output channel corresponds to exactly one input channel. - // For normal, must loop over each input channel. - for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) - { - unsigned int depthwiseMultiplierIdx = 0; - if (depthwise) - { - cInput = cOutput / depthMult; - depthwiseMultiplierIdx = cOutput % depthMult; - } - - for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++) - { - for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) - { - // This loop goes over each input element for each output element. - - unsigned int filterIndex; - - // Since dimensionality of kernel depends on depthwiseness, so does index. - if (depthwise) - { - filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + - xFilter; - } - else - { - filterIndex = cOutput * widthFilter * heightFilter * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + - xFilter; - } - AccumulatorType filterValue = filterData[filterIndex] - - boost::numeric_cast<AccumulatorType>(filterOffset); - - unsigned int yInput = yOutput * hStride + yFilter; - unsigned int xInput = xOutput * xStride + xFilter; - - AccumulatorType inputValue; - - // Check if we're in the padding. - if (yInput < paddingTop || yInput >= heightInput + paddingTop || - xInput < paddingLeft || xInput >= widthInput + paddingLeft ) - { - inputValue = AccumulatorType(); - } - else - { - inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput + - widthInput * heightInput * cInput + - widthInput * (yInput - paddingTop) + - xInput - paddingLeft] - - boost::numeric_cast<AccumulatorType>(inputOffset); - } - sum += filterValue * inputValue; - } - } - } - - if (data.m_Parameters.m_BiasEnabled) - { - sum += biasData[cOutput]; - } - - if (outputScale != 0.0f) - { - float multiplier = (inputScale * filterScale) / outputScale; - // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent - // with the AndroidNN CPU implementation. This should be (roughly) equivalent to: - // sum = std::round(multiplier * sum + outputOffset); - sum = boost::numeric_cast<AccumulatorType>( - QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum)) - + boost::numeric_cast<AccumulatorType>(outputOffset); - sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255); - } - - outputData[batchIdx * widthOutput * heightOutput * channelsOutput + - widthOutput * heightOutput * cOutput + - widthOutput * yOutput + - xOutput] = boost::numeric_cast<InputType>(sum); - } - } - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.cpp b/src/armnn/backends/RefWorkloads/FullyConnected.cpp deleted file mode 100644 index bf5814d2ad..0000000000 --- a/src/armnn/backends/RefWorkloads/FullyConnected.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "FullyConnected.hpp" - -#include <boost/assert.hpp> - -namespace armnn -{ - -void FullyConnected(const float* inputData, - float* outputData, - const TensorInfo& inputTensorInfo, - const TensorInfo& outputTensorInfo, - const float* weightData, - const float* biasData, - bool transposeWeights) -{ - unsigned int N = outputTensorInfo.GetShape()[1]; // Outputs Vector Size. - - BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Needs some data. - - unsigned int K = 1; // Total number of activations in the input. - for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++) - { - K *= inputTensorInfo.GetShape()[i]; - } - - for (unsigned int n = 0; n < inputTensorInfo.GetShape()[0]; n++) - { - for (unsigned int channelOutput = 0; channelOutput < N; channelOutput++) - { - float outval = 0.f; - - for (unsigned int channelInput = 0; channelInput < K; channelInput++) - { - float weight; - if (transposeWeights) - { - weight = weightData[channelOutput * K + channelInput]; - } - else - { - weight = weightData[channelInput * N + channelOutput]; - } - - outval += weight * inputData[n * K + channelInput]; - } - - if (biasData) - { - outval += biasData[channelOutput]; - } - - outputData[n * N + channelOutput] = outval; - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.hpp b/src/armnn/backends/RefWorkloads/FullyConnected.hpp deleted file mode 100644 index 623259f8f8..0000000000 --- a/src/armnn/backends/RefWorkloads/FullyConnected.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> - -namespace armnn -{ - -/// Performs a matrix multiplication and optionally adds a bias. -void FullyConnected(const float* inputData, - float* outputData, - const TensorInfo& inputTensorInfo, - const TensorInfo& outputTensorInfo, - const float* weightData, - const float* biasData, - bool transposeWeights); - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp deleted file mode 100644 index 867925faa2..0000000000 --- a/src/armnn/backends/RefWorkloads/Merger.hpp +++ /dev/null @@ -1,82 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "RefWorkloadUtils.hpp" - -#include "backends/WorkloadData.hpp" - -#include <armnn/Tensor.hpp> - -namespace armnn -{ - -template <typename DataType> -void Merger(const MergerQueueDescriptor& data) -{ - const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); - - for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index) - { - unsigned int indices[MaxNumOfTensorDimensions] = { 0 }; - - unsigned int indexRemainder = index; - unsigned int dimensionStride = outputInfo0.GetNumElements(); - - for (unsigned int i=0; i<outputInfo0.GetNumDimensions(); i++) - { - dimensionStride /= outputInfo0.GetShape()[i]; - indices[i] = indexRemainder / dimensionStride; // Use integer division to round down. - indexRemainder -= indices[i] * dimensionStride; - } - - for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx) - { - MergerQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; - - //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]); - BOOST_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions()); - - // Check all dimensions to see if this element is inside the given input view. - bool insideView = true; - for (unsigned int i=0; i<inputInfo.GetNumDimensions(); i++) - { - if (indices[i] < view.m_Origin[i]) - { - insideView = false; - } - if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i]) - { - insideView = false; - } - } - - if (insideView) - { - unsigned int inIndex = 0; - unsigned int dimensionStride = 1; - - for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;) - { - inIndex += dimensionStride * (indices[i] - view.m_Origin[i]); - dimensionStride *= inputInfo.GetShape()[i]; - } - - //We are within the view, copy input data to the output corresponding to this view. - (GetOutputTensorData<DataType>(0, data))[index] = - (GetInputTensorData<DataType>(viewIdx, data))[inIndex]; - - //What should we do if input views overlap on the output tensor? - //We could error, take the average, or shm else... - //For now just stop after finding first view (input) that matches. - break; - } - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp deleted file mode 100644 index 5812a290e7..0000000000 --- a/src/armnn/backends/RefWorkloads/Pooling2d.cpp +++ /dev/null @@ -1,241 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "Pooling2d.hpp" - -#include <armnn/Exceptions.hpp> -#include <armnn/Types.hpp> - -#include <boost/numeric/conversion/cast.hpp> - -#include <limits> -#include <algorithm> -#include <functional> - -namespace -{ - using PoolingAlgorithm = armnn::PoolingAlgorithm; - - float DefaultInitializer(PoolingAlgorithm algorithm) - { - switch (algorithm) - { - case PoolingAlgorithm::Max: - { - return std::numeric_limits<float>::lowest(); - } - case PoolingAlgorithm::Average: - case PoolingAlgorithm::L2: - { - return 0.0f; - } - default: - { - throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); - } - } - } - - using Accumulator = std::function<void(float & accu, float value)>; - - Accumulator GetAccumulator(PoolingAlgorithm algorithm) - { - switch (algorithm) - { - case PoolingAlgorithm::Max: - { - return [](float & accu, float value) { - if (value > accu) { - accu = value; - } - }; - } - - case PoolingAlgorithm::Average: - { - return [](float & accu, float value) { - accu += value; - }; - } - - case PoolingAlgorithm::L2: - { - return [](float & accu, float value) { - accu += (value*value); - }; - } - - default: - { - throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); - } - } - } - - using Executor = std::function<void(float & accumulated, float kernelSize)>; - - Executor GetExecutor(PoolingAlgorithm algorithm) - { - switch (algorithm) - { - case PoolingAlgorithm::Max: - { - return [](float & accumulated, float kernelSize) {}; - } - - case PoolingAlgorithm::Average: - { - return [](float & accumulated, float kernelSize) { - accumulated /= kernelSize; - }; - } - - case PoolingAlgorithm::L2: - { - return [](float & accumulated, float kernelSize) { - accumulated = sqrtf(accumulated / kernelSize); - }; - } - - default: - { - throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); - } - } - } - - bool OnPaddingOnly(int start, int end, int maxRange, int padding) - { - if (end <= 0 || start > (maxRange - padding)) - { - return true; - } - else - { - return false; - } - } - - - bool ClampRange(int & start, int & end, int maxRange) - { - if (start < 0 || end > maxRange) - { - start = std::min(std::max(start, 0), maxRange); - end = std::min(std::max(end, 0), maxRange); - return true; - } - else - { - return false; - } - } -} - -namespace armnn -{ - -void Pooling2d(const float* in, - float* out, - const TensorInfo& inputInfo, - const TensorInfo& outputInfo, - const Pooling2dDescriptor& params) -{ - const int batchSize = boost::numeric_cast<int>(outputInfo.GetShape()[0]); - const int channels = boost::numeric_cast<int>(outputInfo.GetShape()[1]); - const int heightOutput = boost::numeric_cast<int>(outputInfo.GetShape()[2]); - const int widthOutput = boost::numeric_cast<int>(outputInfo.GetShape()[3]); - const int heightInput = boost::numeric_cast<int>(inputInfo.GetShape()[2]); - const int widthInput = boost::numeric_cast<int>(inputInfo.GetShape()[3]); - const int padLeft = boost::numeric_cast<int>(params.m_PadLeft); - const int padRight = boost::numeric_cast<int>(params.m_PadRight); - const int padTop = boost::numeric_cast<int>(params.m_PadTop); - const int padBottom = boost::numeric_cast<int>(params.m_PadBottom); - const int strideX = boost::numeric_cast<int>(params.m_StrideX); - const int strideY = boost::numeric_cast<int>(params.m_StrideY); - const int poolHeight = boost::numeric_cast<int>(params.m_PoolHeight); - const int poolWidth = boost::numeric_cast<int>(params.m_PoolWidth); - - float defaultInitializer = DefaultInitializer(params.m_PoolType); - - Accumulator accumulate = GetAccumulator(params.m_PoolType); - Executor execute = GetExecutor(params.m_PoolType); - - // Check supported padding methods outside the loop to simplify - // the inner loop. - if (params.m_PaddingMethod != PaddingMethod::Exclude && - params.m_PaddingMethod != PaddingMethod::IgnoreValue) - { - throw armnn::InvalidArgumentException("Unsupported padding type"); - } - - for (int n = 0; n < batchSize; n++) - { - for (int c = 0; c < channels; c++) - { - for (int yOutput = 0; yOutput < heightOutput; yOutput++) - { - for (int xOutput = 0; xOutput < widthOutput; xOutput++) - { - int hstart = (yOutput * strideY) - padTop; - int wstart = (xOutput * strideX) - padLeft; - int hend = hstart + poolHeight; - int wend = wstart + poolWidth; - - // Clamp the pooling region inside the valid input area (which includes the padding). - // This is necessary because the final pooling in a row may overlap beyond the padding. - hend = std::min(hend, heightInput + padBottom); - wend = std::min(wend, widthInput + padRight); - - float result = defaultInitializer; - float poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart)); - - // Special case: when the pooling kernel is over a padding region and the padding - // size is larger or equal to the kernel and the kernel only covers - // padding and no real values, then we initialize the result as zero - // by convention. This is because we need to choose a value here and - // all values we have are padding, which we ignore. - if (OnPaddingOnly(hstart, hend, heightInput, padBottom) || - OnPaddingOnly(wstart, wend, widthInput, padRight)) - { - result = 0.0f; - } - - bool clamped = ClampRange(wstart, wend, widthInput); - clamped |= ClampRange(hstart, hend, heightInput); - - if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude) - { - // When we exclude the padding, it means we calculate with a smaller - // kernel size, so I changed the divisor here. - poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart)); - } - - for (auto yInput = hstart; yInput < hend; yInput++) - { - for (auto xInput = wstart; xInput < wend; xInput++) - { - float inval = in[n * widthInput * heightInput * channels + - c * widthInput * heightInput + - yInput * widthInput + - xInput]; - - accumulate(result, inval); - } - } - - execute(result, poolAreaSize); - - out[n * widthOutput * heightOutput * channels + - c * widthOutput * heightOutput + - yOutput * widthOutput + - xOutput] = result; - } - } - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.hpp b/src/armnn/backends/RefWorkloads/Pooling2d.hpp deleted file mode 100644 index da56b25c4e..0000000000 --- a/src/armnn/backends/RefWorkloads/Pooling2d.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Descriptors.hpp> -#include <armnn/Tensor.hpp> - -namespace armnn -{ - -/// Computes the Pooling2d operation. -void Pooling2d(const float* in, - float* out, - const TensorInfo& inputInfo, - const TensorInfo& outputInfo, - const Pooling2dDescriptor& params); - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp deleted file mode 100644 index 3cc59be7a4..0000000000 --- a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefActivationFloat32Workload.hpp" - -#include "Activation.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefActivationFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationFloat32Workload_Execute"); - - Activation(GetInputTensorDataFloat(0, m_Data), - GetOutputTensorDataFloat(0, m_Data), - GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Parameters.m_Function, - m_Data.m_Parameters.m_A, - m_Data.m_Parameters.m_B); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp deleted file mode 100644 index 0de33f02ff..0000000000 --- a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -namespace armnn -{ - -class RefActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor> -{ -public: - using Float32Workload<ActivationQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp deleted file mode 100644 index b95c2e22a8..0000000000 --- a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefActivationUint8Workload.hpp" - -#include "Activation.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <vector> - -namespace armnn -{ - -void RefActivationUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationUint8Workload_Execute"); - - const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); - - auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); - - std::vector<float> results(tensorInfo.GetNumElements()); - - Activation(dequant.data(), - results.data(), - tensorInfo, - m_Data.m_Parameters.m_Function, - m_Data.m_Parameters.m_A, - m_Data.m_Parameters.m_B); - - Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp deleted file mode 100644 index f38888a9f7..0000000000 --- a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> -{ -public: - using Uint8Workload<ActivationQueueDescriptor>::Uint8Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefArithmeticWorkload.cpp b/src/armnn/backends/RefWorkloads/RefArithmeticWorkload.cpp deleted file mode 100644 index 6c39fa1186..0000000000 --- a/src/armnn/backends/RefWorkloads/RefArithmeticWorkload.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefArithmeticWorkload.hpp" -#include "ArithmeticFunction.hpp" -#include "RefWorkloadUtils.hpp" -#include "Profiling.hpp" -#include <vector> - -namespace armnn -{ - -template <typename ParentDescriptor, typename Functor> -void BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString); - - auto data = Float32Workload<ParentDescriptor>::GetData(); - const TensorShape& inShape0 = GetTensorInfo(data.m_Inputs[0]).GetShape(); - const TensorShape& inShape1 = GetTensorInfo(data.m_Inputs[1]).GetShape(); - const TensorShape& outShape = GetTensorInfo(data.m_Outputs[0]).GetShape(); - - const float* inData0 = GetInputTensorDataFloat(0, data); - const float* inData1 = GetInputTensorDataFloat(1, data); - float* outData = GetOutputTensorDataFloat(0, data); - - ArithmeticFunction<Functor>(inShape0, inShape1, outShape, inData0, inData1, outData); -} - -template <typename ParentDescriptor, typename Functor> -void BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString); - - auto data = Uint8Workload<ParentDescriptor>::GetData(); - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - - auto dequant0 = Dequantize(GetInputTensorDataU8(0, data), inputInfo0); - auto dequant1 = Dequantize(GetInputTensorDataU8(1, data), inputInfo1); - - std::vector<float> results(outputInfo.GetNumElements()); - - ArithmeticFunction<Functor>(inputInfo0.GetShape(), - inputInfo1.GetShape(), - outputInfo.GetShape(), - dequant0.data(), - dequant1.data(), - results.data()); - - Quantize(GetOutputTensorDataU8(0, data), results.data(), outputInfo); -} - -} - -template class armnn::BaseFloat32ArithmeticWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>; -template class armnn::BaseUint8ArithmeticWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>; - -template class armnn::BaseFloat32ArithmeticWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>; -template class armnn::BaseUint8ArithmeticWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>; - -template class armnn::BaseFloat32ArithmeticWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>; -template class armnn::BaseUint8ArithmeticWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>; - -template class armnn::BaseFloat32ArithmeticWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>; -template class armnn::BaseUint8ArithmeticWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>; diff --git a/src/armnn/backends/RefWorkloads/RefArithmeticWorkload.hpp b/src/armnn/backends/RefWorkloads/RefArithmeticWorkload.hpp deleted file mode 100644 index 7197b7a883..0000000000 --- a/src/armnn/backends/RefWorkloads/RefArithmeticWorkload.hpp +++ /dev/null @@ -1,122 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Types.hpp> -#include "backends/StringMapping.hpp" -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -template <typename Functor, - typename armnn::DataType DataType, - typename ParentDescriptor, - typename armnn::StringMapping::Id DebugString> -class RefArithmeticWorkload -{ - // Needs specialization. The default is empty on purpose. -}; - -template <typename ParentDescriptor, typename Functor> -class BaseFloat32ArithmeticWorkload : public Float32Workload<ParentDescriptor> -{ -public: - using Float32Workload<ParentDescriptor>::Float32Workload; - void ExecuteImpl(const char * debugString) const; -}; - -template <typename Functor, - typename ParentDescriptor, - typename armnn::StringMapping::Id DebugString> -class RefArithmeticWorkload<Functor, armnn::DataType::Float32, ParentDescriptor, DebugString> - : public BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor> -{ -public: - using BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>::BaseFloat32ArithmeticWorkload; - - virtual void Execute() const override - { - using Parent = BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>; - Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString)); - } -}; - -template <typename ParentDescriptor, typename Functor> -class BaseUint8ArithmeticWorkload : public Uint8Workload<ParentDescriptor> -{ -public: - using Uint8Workload<ParentDescriptor>::Uint8Workload; - void ExecuteImpl(const char * debugString) const; -}; - -template <typename Functor, - typename ParentDescriptor, - typename armnn::StringMapping::Id DebugString> -class RefArithmeticWorkload<Functor, armnn::DataType::QuantisedAsymm8, ParentDescriptor, DebugString> - : public BaseUint8ArithmeticWorkload<ParentDescriptor, Functor> -{ -public: - using BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>::BaseUint8ArithmeticWorkload; - - virtual void Execute() const override - { - using Parent = BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>; - Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString)); - } -}; - -using RefAdditionFloat32Workload = - RefArithmeticWorkload<std::plus<float>, - DataType::Float32, - AdditionQueueDescriptor, - StringMapping::RefAdditionWorkload_Execute>; - -using RefAdditionUint8Workload = - RefArithmeticWorkload<std::plus<float>, - DataType::QuantisedAsymm8, - AdditionQueueDescriptor, - StringMapping::RefAdditionWorkload_Execute>; - - -using RefSubtractionFloat32Workload = - RefArithmeticWorkload<std::minus<float>, - DataType::Float32, - SubtractionQueueDescriptor, - StringMapping::RefSubtractionWorkload_Execute>; - -using RefSubtractionUint8Workload = - RefArithmeticWorkload<std::minus<float>, - DataType::QuantisedAsymm8, - SubtractionQueueDescriptor, - StringMapping::RefSubtractionWorkload_Execute>; - -using RefMultiplicationFloat32Workload = - RefArithmeticWorkload<std::multiplies<float>, - DataType::Float32, - MultiplicationQueueDescriptor, - StringMapping::RefMultiplicationWorkload_Execute>; - -using RefMultiplicationUint8Workload = - RefArithmeticWorkload<std::multiplies<float>, - DataType::QuantisedAsymm8, - MultiplicationQueueDescriptor, - StringMapping::RefMultiplicationWorkload_Execute>; - -using RefDivisionFloat32Workload = - RefArithmeticWorkload<std::divides<float>, - DataType::Float32, - DivisionQueueDescriptor, - StringMapping::RefDivisionWorkload_Execute>; - -using RefDivisionUint8Workload = - RefArithmeticWorkload<std::divides<float>, - DataType::QuantisedAsymm8, - DivisionQueueDescriptor, - StringMapping::RefDivisionWorkload_Execute>; - -} // armnn diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp deleted file mode 100644 index 647677b4fb..0000000000 --- a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefBaseConstantWorkload.hpp" - -#include "RefWorkloadUtils.hpp" - -#include <armnn/Types.hpp> - -#include <boost/assert.hpp> - -#include <cstring> - -namespace armnn -{ - -template <armnn::DataType DataType> -void RefBaseConstantWorkload<DataType>::Execute() const -{ - // Considering the reference backend independently, it could be possible to initialise the intermediate tensor - // created by the layer output handler at workload construction time, rather than at workload execution time. - // However, this is not an option for other backends (e.g. CL). For consistency, we prefer to align all - // implementations. - // A similar argument can be made about performing the memory copy in the first place (the layer output handler - // could have a non-owning reference to the layer output tensor managed by the const input layer); again, this is - // not an option for other backends, and the extra complexity required to make this work for the reference backend - // may not be worth the effort (skipping a memory copy in the first inference). - if (!m_RanOnce) - { - const ConstantQueueDescriptor& data = this->m_Data; - - BOOST_ASSERT(data.m_LayerOutput != nullptr); - - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - BOOST_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes()); - - memcpy(GetOutputTensorData<void>(0, data), data.m_LayerOutput->GetConstTensor<void>(), - outputInfo.GetNumBytes()); - - m_RanOnce = true; - } -} - -template class RefBaseConstantWorkload<DataType::Float32>; -template class RefBaseConstantWorkload<DataType::QuantisedAsymm8>; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp deleted file mode 100644 index 8dc9fd6104..0000000000 --- a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -#include <armnn/Types.hpp> - -namespace armnn -{ - -// Base class template providing an implementation of the Constant layer common to all data types. -template <armnn::DataType DataType> -class RefBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType> -{ -public: - RefBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) - : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info) - , m_RanOnce(false) - { - } - - virtual void Execute() const override; - -private: - mutable bool m_RanOnce; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp deleted file mode 100644 index 313af9c438..0000000000 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefBatchNormalizationFloat32Workload.hpp" - -#include "BatchNormImpl.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ -RefBatchNormalizationFloat32Workload::RefBatchNormalizationFloat32Workload( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info), - m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))), - m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))), - m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))), - m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {} - -void RefBatchNormalizationFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute"); - - const float* var = m_Variance->GetConstTensor<float>(); - const float* mean = m_Mean->GetConstTensor<float>(); - const float* gamma = m_Gamma->GetConstTensor<float>(); - const float* beta = m_Beta->GetConstTensor<float>(); - - auto inputData = GetInputTensorDataFloat(0, m_Data); - auto outputData = GetOutputTensorDataFloat(0, m_Data); - - BatchNormImpl(m_Data, var, mean, gamma, beta, outputData, inputData); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp deleted file mode 100644 index 15c843c2ca..0000000000 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor> -{ -public: - explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Mean; - std::unique_ptr<ScopedCpuTensorHandle> m_Variance; - std::unique_ptr<ScopedCpuTensorHandle> m_Beta; - std::unique_ptr<ScopedCpuTensorHandle> m_Gamma; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp deleted file mode 100644 index e248ad4b9d..0000000000 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefBatchNormalizationUint8Workload.hpp" - -#include "BatchNormImpl.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <vector> - -namespace armnn -{ -RefBatchNormalizationUint8Workload::RefBatchNormalizationUint8Workload( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8Workload<BatchNormalizationQueueDescriptor>(descriptor, info), - m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))), - m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))), - m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))), - m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {} - -void RefBatchNormalizationUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute"); - - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& varInfo = GetTensorInfo(m_Variance.get()); - const TensorInfo& meanInfo = GetTensorInfo(m_Mean.get()); - const TensorInfo& gammaInfo = GetTensorInfo(m_Gamma.get()); - const TensorInfo& betaInfo = GetTensorInfo(m_Beta.get()); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); - auto var = Dequantize(m_Variance->GetConstTensor<uint8_t>(), varInfo); - auto mean = Dequantize(m_Mean->GetConstTensor<uint8_t>(), meanInfo); - auto gamma = Dequantize(m_Gamma->GetConstTensor<uint8_t>(), gammaInfo); - auto beta = Dequantize(m_Beta->GetConstTensor<uint8_t>(), betaInfo); - - std::vector<float> results(outputInfo.GetNumElements()); - BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data()); - Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp deleted file mode 100644 index d3e8e0a120..0000000000 --- a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefBatchNormalizationUint8Workload : public Uint8Workload<BatchNormalizationQueueDescriptor> -{ -public: - explicit RefBatchNormalizationUint8Workload(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Mean; - std::unique_ptr<ScopedCpuTensorHandle> m_Variance; - std::unique_ptr<ScopedCpuTensorHandle> m_Beta; - std::unique_ptr<ScopedCpuTensorHandle> m_Gamma; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp deleted file mode 100644 index 074e8ccaae..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefConstantFloat32Workload.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefConstantFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantFloat32Workload_Execute"); - RefBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp deleted file mode 100644 index 76e3a42026..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "RefBaseConstantWorkload.hpp" - -namespace armnn -{ - -class RefConstantFloat32Workload : public RefBaseConstantWorkload<DataType::Float32> -{ -public: - using RefBaseConstantWorkload<DataType::Float32>::RefBaseConstantWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp deleted file mode 100644 index 07e4719d54..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefConstantUint8Workload.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefConstantUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantUint8Workload_Execute"); - RefBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp deleted file mode 100644 index 02552ac80b..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "RefBaseConstantWorkload.hpp" - -namespace armnn -{ - -class RefConstantUint8Workload : public RefBaseConstantWorkload<DataType::QuantisedAsymm8> -{ -public: - using RefBaseConstantWorkload<DataType::QuantisedAsymm8>::RefBaseConstantWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp deleted file mode 100644 index e148bf6a9d..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefConvertFp16ToFp32Workload.hpp" -#include "Half.hpp" -#include "RefWorkloadUtils.hpp" -#include "FloatingPointConverter.hpp" - -namespace armnn -{ - -void RefConvertFp16ToFp32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute"); - - const Half* const input = GetInputTensorDataHalf(0, m_Data); - float* const output = GetOutputTensorDataFloat(0, m_Data); - - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); - armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp deleted file mode 100644 index 5e841ba34f..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor> -{ -public: - using Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>::Float16ToFloat32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp deleted file mode 100644 index efaaf8e1ad..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefConvertFp32ToFp16Workload.hpp" - -#include "Half.hpp" -#include "FloatingPointConverter.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefConvertFp32ToFp16Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute"); - - const float* const input = GetInputTensorDataFloat(0, m_Data); - Half* const output = GetOutputTensorDataHalf(0, m_Data); - - // convert Fp32 input to Fp16 output - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); - armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp deleted file mode 100644 index 0754fd5c79..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor> -{ -public: - using Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>::Float32ToFloat16Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp deleted file mode 100644 index 20905646d7..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefConvolution2dFloat32Workload.hpp" - -#include "ConvImpl.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ -RefConvolution2dFloat32Workload::RefConvolution2dFloat32Workload( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info), - m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))), - m_Bias(descriptor.m_Parameters.m_BiasEnabled - ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {} - -void RefConvolution2dFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute"); - - float* outputData = GetOutputTensorDataFloat(0, m_Data); - const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Weight->template GetConstTensor<float>(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<float>() : nullptr; - const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); - - ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>( - m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp deleted file mode 100644 index 34489e807c..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor> -{ -public: - explicit RefConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Weight; - std::unique_ptr<ScopedCpuTensorHandle> m_Bias; - -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp deleted file mode 100644 index 881e9bf6b0..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefConvolution2dUint8Workload.hpp" - -#include "ConvImpl.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ -RefConvolution2dUint8Workload::RefConvolution2dUint8Workload( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info), - m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))), - m_Bias(descriptor.m_Parameters.m_BiasEnabled - ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {} - -void RefConvolution2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dUint8Workload_Execute"); - - const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>(); - const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<int32_t>() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); - - ConvImpl<armnn::Convolution2dQueueDescriptor, uint8_t, int32_t, int32_t>( - m_Data, - inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), - weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), - biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp deleted file mode 100644 index 0e2dd6aada..0000000000 --- a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor> -{ -public: - explicit RefConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Weight; - std::unique_ptr<ScopedCpuTensorHandle> m_Bias; - -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp deleted file mode 100644 index e89013b9bd..0000000000 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefDepthwiseConvolution2dFloat32Workload.hpp" - -#include "ConvImpl.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ -RefDepthwiseConvolution2dFloat32Workload::RefDepthwiseConvolution2dFloat32Workload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info), - m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))), - m_Bias(descriptor.m_Parameters.m_BiasEnabled - ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {} - -void RefDepthwiseConvolution2dFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute"); - - float* outputData = GetOutputTensorDataFloat(0, m_Data); - const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Weight->template GetConstTensor<float>(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<float>() : nullptr; - const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); - - ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float> - (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp deleted file mode 100644 index 8f1227e2de..0000000000 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor> -{ -public: - explicit RefDepthwiseConvolution2dFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Weight; - std::unique_ptr<ScopedCpuTensorHandle> m_Bias; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp deleted file mode 100644 index e8e501d6ae..0000000000 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefDepthwiseConvolution2dUint8Workload.hpp" - -#include "ConvImpl.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -RefDepthwiseConvolution2dUint8Workload::RefDepthwiseConvolution2dUint8Workload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info), - m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))), - m_Bias(descriptor.m_Parameters.m_BiasEnabled - ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {} - -void RefDepthwiseConvolution2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute"); - - const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>(); - const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<int32_t>() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); - - ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, uint8_t, int32_t, int32_t>( - m_Data, - inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), - weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), - biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp deleted file mode 100644 index c615cf7880..0000000000 --- a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> -{ -public: - explicit RefDepthwiseConvolution2dUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Weight; - std::unique_ptr<ScopedCpuTensorHandle> m_Bias; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp deleted file mode 100644 index 3e16f60b11..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefFakeQuantizationFloat32Workload.hpp" - -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <boost/numeric/conversion/cast.hpp> - -namespace armnn -{ - -void FakeQuantization(const float* inputData, float* outputData, uint32_t numElements, float min, float max) -{ - float scale = (max - min) / 255.f; - int32_t offset = boost::numeric_cast<int32_t>((-min * 255.f) / (max - min)); - - for (uint32_t i = 0; i < numElements; i++) - { - outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset)); - } - -} - -void RefFakeQuantizationFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - - const float* inputData = GetInputTensorDataFloat(0, m_Data); - float* outputData = GetOutputTensorDataFloat(0, m_Data); - FakeQuantization(inputData, outputData, inputInfo.GetNumElements(), - m_Data.m_Parameters.m_Min, - m_Data.m_Parameters.m_Max); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp deleted file mode 100644 index 523fdcff50..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefFakeQuantizationFloat32Workload : public Float32Workload<FakeQuantizationQueueDescriptor> -{ -public: - using Float32Workload<FakeQuantizationQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp deleted file mode 100644 index cc1f8800dc..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefFloorFloat32Workload.hpp" - -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefFloorFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute"); - - const float* const input = GetInputTensorDataFloat(0, m_Data); - float* const output = GetOutputTensorDataFloat(0, m_Data); - - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); - for (unsigned int i = 0; i < numElements; ++i) - { - output[i] = floorf(input[i]); - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp deleted file mode 100644 index d7cfa50365..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor> -{ -public: - using Float32Workload<FloorQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp deleted file mode 100644 index ccaf4cd87b..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefFullyConnectedFloat32Workload.hpp" - -#include "FullyConnected.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ -RefFullyConnectedFloat32Workload::RefFullyConnectedFloat32Workload( - const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info), - m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))), - m_Bias(descriptor.m_Parameters.m_BiasEnabled - ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {} - -void RefFullyConnectedFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedFloat32Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - float* outputData = GetOutputTensorDataFloat(0, m_Data); - const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Weight->GetConstTensor<float>(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->GetConstTensor<float>() : nullptr; - - FullyConnected(inputData, - outputData, - inputInfo, - outputInfo, - weightData, - biasData, - m_Data.m_Parameters.m_TransposeWeightMatrix); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp deleted file mode 100644 index ce058690ac..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor> -{ -public: - explicit RefFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Weight; - std::unique_ptr<ScopedCpuTensorHandle> m_Bias; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp deleted file mode 100644 index cd785d786c..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefFullyConnectedUint8Workload.hpp" - -#include "FullyConnected.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <vector> - -namespace armnn -{ -RefFullyConnectedUint8Workload::RefFullyConnectedUint8Workload( - const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8Workload<FullyConnectedQueueDescriptor>(descriptor, info), - m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))), - m_Bias(descriptor.m_Parameters.m_BiasEnabled - ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {} - -void RefFullyConnectedUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedUint8Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - const uint8_t* weightData = m_Weight->GetConstTensor<uint8_t>(); - - auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); - - auto weight = Dequantize(weightData, m_Weight->GetTensorInfo()); - - std::vector<float> results(outputInfo.GetNumElements()); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - const int32_t* biasData = m_Bias->GetConstTensor<int32_t>(); - auto bias = Dequantize(biasData, m_Bias->GetTensorInfo()); - - FullyConnected(dequant.data(), - results.data(), - inputInfo, - outputInfo, - weight.data(), - bias.data(), - m_Data.m_Parameters.m_TransposeWeightMatrix); - } - else - { - FullyConnected(dequant.data(), - results.data(), - inputInfo, - outputInfo, - weight.data(), - nullptr, - m_Data.m_Parameters.m_TransposeWeightMatrix); - } - - Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp deleted file mode 100644 index e489cc7d81..0000000000 --- a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefFullyConnectedUint8Workload : public Uint8Workload<FullyConnectedQueueDescriptor> -{ -public: - explicit RefFullyConnectedUint8Workload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Weight; - std::unique_ptr<ScopedCpuTensorHandle> m_Bias; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp deleted file mode 100644 index 973c87b009..0000000000 --- a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefL2NormalizationFloat32Workload.hpp" - -#include "RefWorkloadUtils.hpp" -#include "TensorBufferArrayView.hpp" - -#include "Profiling.hpp" - -#include <cmath> - -namespace armnn -{ - -void RefL2NormalizationFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationFloat32Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - TensorBufferArrayView<const float> input(inputInfo.GetShape(), GetInputTensorDataFloat(0, m_Data)); - TensorBufferArrayView<float> output(outputInfo.GetShape(), GetOutputTensorDataFloat(0, m_Data)); - - const unsigned int batchSize = inputInfo.GetShape()[0]; - const unsigned int depth = inputInfo.GetShape()[1]; - const unsigned int rows = inputInfo.GetShape()[2]; - const unsigned int cols = inputInfo.GetShape()[3]; - - for (unsigned int n = 0; n < batchSize; ++n) - { - for (unsigned int d = 0; d < depth; ++d) - { - for (unsigned int h = 0; h < rows; ++h) - { - for (unsigned int w = 0; w < cols; ++w) - { - float reduction = 0.0; - for (unsigned int c = 0; c < depth; ++c) - { - const float value = input.Get(n, c, h, w); - reduction += value * value; - } - - // Using std::max(reduction, epsilon) below would prevent against division by 0. - // However, at the time of writing: - // - This is not supported by the ACL functions used to implement L2Normalization in the CL - // backend. - // - The reference semantics for this operator do not include this parameter. - const float scale = 1.0f / sqrtf(reduction); - output.Get(n, d, h, w) = input.Get(n, d, h, w) * scale; - } - } - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp deleted file mode 100644 index a3f03f3060..0000000000 --- a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor> -{ -public: - using Float32Workload<L2NormalizationQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp deleted file mode 100644 index 50ff605701..0000000000 --- a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefLstmFloat32Workload.hpp" - -namespace armnn -{ - -void RefLstmFloat32Workload::Execute() const -{ - throw armnn::Exception("No implementation of Lstm in the Ref backend!"); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp deleted file mode 100644 index fc4f7776c6..0000000000 --- a/src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefLstmFloat32Workload : public Float32Workload<LstmQueueDescriptor> -{ -public: - using Float32Workload<LstmQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp deleted file mode 100644 index b1f8a32ee7..0000000000 --- a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefMergerFloat32Workload.hpp" - -#include "Merger.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefMergerFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerFloat32Workload_Execute"); - Merger<float>(m_Data); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp deleted file mode 100644 index 23a523c852..0000000000 --- a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefMergerFloat32Workload : public Float32Workload<MergerQueueDescriptor> -{ -public: - using Float32Workload<MergerQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp deleted file mode 100644 index 47ce1cf731..0000000000 --- a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefMergerUint8Workload.hpp" - -#include "Merger.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefMergerUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerUint8Workload_Execute"); - Merger<uint8_t>(m_Data); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp deleted file mode 100644 index 65dc42120a..0000000000 --- a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefMergerUint8Workload : public Uint8Workload<MergerQueueDescriptor> -{ -public: - using Uint8Workload<MergerQueueDescriptor>::Uint8Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp deleted file mode 100644 index 5c24416624..0000000000 --- a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp +++ /dev/null @@ -1,185 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefNormalizationFloat32Workload.hpp" - -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <armnn/Tensor.hpp> - -#include <boost/log/trivial.hpp> -#include <boost/numeric/conversion/cast.hpp> - -namespace armnn -{ - -// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization. -static void NormalizeWithinUingLbr(const float* inputData, - float* outputData, - const TensorShape& tensorShape, - uint32_t norm_size, - float alpha, - float beta, - float kappa) -{ - const unsigned int batchSize = tensorShape[0]; - const unsigned int depth = tensorShape[1]; - const unsigned int rows = tensorShape[2]; - const unsigned int cols = tensorShape[3]; - - int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */ - - for (unsigned int n = 0; n < batchSize; n++) - { - for (unsigned int c = 0; c < depth; c++) - { - for (unsigned int h = 0; h < rows; h++) - { - for (unsigned int w = 0; w < cols; w++) - { - float accumulated_scale = 0.0; - for (int y = -radius; y <= radius; y++) - { - for (int x = -radius; x <= radius; x++) - { - int i = boost::numeric_cast<int>(w) + x; - int j = boost::numeric_cast<int>(h) + y; - - if ((i < 0) || (i >= boost::numeric_cast<int>(cols))) - { - continue; - } - - if ((j < 0) || (j >= boost::numeric_cast<int>(rows))) - { - continue; - } - - float inval = inputData[n * cols * rows * depth + - c * cols * rows + - boost::numeric_cast<unsigned int>(j) * cols + - boost::numeric_cast<unsigned int>(i)]; - - accumulated_scale += inval*inval; - } - } - outputData[n * cols * rows * depth + - c * cols * rows + - h * cols + - w] = inputData[n * cols * rows * depth + - c * cols * rows + - h * cols + - w] / (powf((kappa + (accumulated_scale * alpha)), beta)); - } - } - } - } -} - -// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization. -void NormalizeAcrossUingLbr(const float* inputData, - float* outputData, - const TensorShape& tensorShape, - uint32_t norm_size, - float alpha, - float beta, - float kappa) -{ - const unsigned int batchSize = tensorShape[0]; - const unsigned int depth = tensorShape[1]; - const unsigned int rows = tensorShape[2]; - const unsigned int cols = tensorShape[3]; - - int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */ - - for (unsigned int n = 0; n < batchSize; n++) - { - for (unsigned int c = 0; c < depth; c++) - { - for (unsigned int h = 0; h < rows; h++) - { - for (unsigned int w = 0; w < cols; w++) - { - float accumulated_scale = 0.0; - for (int z = -radius; z <= radius; z++) - { - int k = boost::numeric_cast<int>(c) + z; - - if ((k < 0) || (k >= boost::numeric_cast<int>(depth))) - { - continue; - } - - float inval = inputData[n * cols * rows * depth + - boost::numeric_cast<unsigned int>(k) * cols * rows + - h * cols + - w]; - - accumulated_scale += inval*inval; - } - float scale = kappa + (accumulated_scale * alpha); - scale = powf(scale, -beta); - outputData[n * cols * rows * depth + - c * cols * rows + - h * cols + - w] = scale * - inputData[n * cols * rows * depth + - c * cols * rows + - h * cols + - w]; - } - } - } - } -} - -void RefNormalizationFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationFloat32Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - - float* outputData = GetOutputTensorDataFloat(0, m_Data); - const float* inputData = GetInputTensorDataFloat(0, m_Data); - - - if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType) - { - if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType) - { - NormalizeWithinUingLbr(inputData, - outputData, - inputInfo.GetShape(), - m_Data.m_Parameters.m_NormSize, - m_Data.m_Parameters.m_Alpha, - m_Data.m_Parameters.m_Beta, - m_Data.m_Parameters.m_K); - } - else if (NormalizationAlgorithmChannel::Across == m_Data.m_Parameters.m_NormChannelType) - { - NormalizeAcrossUingLbr(inputData, - outputData, - inputInfo.GetShape(), - m_Data.m_Parameters.m_NormSize, - m_Data.m_Parameters.m_Alpha, - m_Data.m_Parameters.m_Beta, - m_Data.m_Parameters.m_K); - } - else - { - BOOST_LOG_TRIVIAL(warning) << "Illegal NORMALIZATION mode in normalization_f32"; - return; - } - } - else - { - BOOST_LOG_TRIVIAL(warning) << "Lcr method (Jarret 2009: Local Contrast Normalization) not supported yet."; - return; - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp deleted file mode 100644 index e30356c422..0000000000 --- a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor> -{ -public: - using Float32Workload<NormalizationQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp deleted file mode 100644 index 4093ff38f4..0000000000 --- a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefPermuteWorkload.hpp" -#include "RefWorkloadUtils.hpp" - -#include <Permute.hpp> -#include "TypeUtils.hpp" - -namespace armnn -{ - -template <armnn::DataType DataType> -void RefPermuteWorkload<DataType>::Execute() const -{ - using T = ResolveType<DataType>; - - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); - - const ITensorHandle* src = m_Data.m_Inputs[0]; - const ITensorHandle* dst = m_Data.m_Outputs[0]; - const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - - armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData<T>(src), GetCpuData<T>(dst)); -} - -template class RefPermuteWorkload<DataType::Float32>; -template class RefPermuteWorkload<DataType::QuantisedAsymm8>; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp deleted file mode 100644 index d72cf77e74..0000000000 --- a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include <armnn/TypesUtils.hpp> - -namespace armnn -{ - -template <armnn::DataType DataType> -class RefPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType> -{ -public: - static const std::string& GetName() - { - static const std::string name = std::string("RefPermute") + GetDataTypeName(DataType) + "Workload"; - return name; - } - - using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data; - using TypedWorkload<PermuteQueueDescriptor, DataType>::TypedWorkload; - void Execute() const override; -}; - -using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>; -using RefPermuteUint8Workload = RefPermuteWorkload<DataType::QuantisedAsymm8>; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp deleted file mode 100644 index 2542756c26..0000000000 --- a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefPooling2dFloat32Workload.hpp" - -#include "Pooling2d.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefPooling2dFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dFloat32Workload_Execute"); - - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo0 = GetTensorInfo(m_Data.m_Outputs[0]); - - float* outputData = GetOutputTensorDataFloat(0, m_Data); - const float* inputData = GetInputTensorDataFloat(0, m_Data); - - Pooling2d(inputData, - outputData, - inputInfo0, - outputInfo0, - m_Data.m_Parameters); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp deleted file mode 100644 index 501fb71aff..0000000000 --- a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefPooling2dFloat32Workload : public Float32Workload<Pooling2dQueueDescriptor> -{ -public: - using Float32Workload<Pooling2dQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp deleted file mode 100644 index 91fdf291ee..0000000000 --- a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefPooling2dUint8Workload.hpp" - -#include "Pooling2d.hpp" -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <vector> - -namespace armnn -{ - -void RefPooling2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dUint8Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); - - std::vector<float> results(outputInfo.GetNumElements()); - Pooling2d(dequant.data(), - results.data(), - inputInfo, - outputInfo, - m_Data.m_Parameters); - - Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp deleted file mode 100644 index 6544f9a785..0000000000 --- a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefPooling2dUint8Workload : public Uint8Workload<Pooling2dQueueDescriptor> -{ -public: - using Uint8Workload<Pooling2dQueueDescriptor>::Uint8Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp deleted file mode 100644 index 99c94a49a1..0000000000 --- a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefReshapeFloat32Workload.hpp" - -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <cstring> - -namespace armnn -{ - -void RefReshapeFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeFloat32Workload_Execute"); - - void* output = GetOutputTensorData<void>(0, m_Data); - const void* input = GetInputTensorData<void>(0, m_Data); - unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); - memcpy(output, input, numBytes); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp deleted file mode 100644 index 9281e89cf7..0000000000 --- a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor> -{ -public: - using Float32Workload<ReshapeQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp deleted file mode 100644 index 8f475f3db3..0000000000 --- a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefReshapeUint8Workload.hpp" - -#include "RefWorkloadUtils.hpp" - -#include "Profiling.hpp" - -#include <cstring> - -namespace armnn -{ - -void RefReshapeUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeUint8Workload_Execute"); - - void* output = GetOutputTensorData<void>(0, m_Data); - const void* input = GetInputTensorData<void>(0, m_Data); - unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); - memcpy(output, input, numBytes); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp deleted file mode 100644 index b37fb4bdeb..0000000000 --- a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> -{ -public: - using Uint8Workload<ReshapeQueueDescriptor>::Uint8Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp deleted file mode 100644 index 50ee7a218a..0000000000 --- a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefResizeBilinearFloat32Workload.hpp" - -#include "RefWorkloadUtils.hpp" -#include "ResizeBilinear.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefResizeBilinearFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearFloat32Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - ResizeBilinear(GetInputTensorDataFloat(0, m_Data), - inputInfo, - GetOutputTensorDataFloat(0, m_Data), - outputInfo); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp deleted file mode 100644 index 0fff7ee695..0000000000 --- a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor> -{ -public: - using Float32Workload<ResizeBilinearQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp deleted file mode 100644 index 67ab039ef3..0000000000 --- a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefResizeBilinearUint8Workload.hpp" - -#include "RefWorkloadUtils.hpp" -#include "ResizeBilinear.hpp" - -#include "Profiling.hpp" - -#include <vector> - -namespace armnn -{ - -void RefResizeBilinearUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearUint8Workload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); - - std::vector<float> results(outputInfo.GetNumElements()); - ResizeBilinear(dequant.data(), inputInfo, results.data(), outputInfo); - - Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp deleted file mode 100644 index bbaf899ca6..0000000000 --- a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefResizeBilinearUint8Workload : public Uint8Workload<ResizeBilinearQueueDescriptor> -{ -public: - using Uint8Workload<ResizeBilinearQueueDescriptor>::Uint8Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp deleted file mode 100644 index 1f519bda10..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefSoftmaxFloat32Workload.hpp" - -#include "RefWorkloadUtils.hpp" -#include "Softmax.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefSoftmaxFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxFloat32Workload_Execute"); - - Softmax(GetInputTensorDataFloat(0, m_Data), - GetOutputTensorDataFloat(0, m_Data), - GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Parameters.m_Beta); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp deleted file mode 100644 index d37f2b5990..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor> -{ -public: - using Float32Workload<SoftmaxQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp deleted file mode 100644 index 17114ec83a..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefSoftmaxUint8Workload.hpp" - -#include "RefWorkloadUtils.hpp" -#include "Softmax.hpp" - -#include "Profiling.hpp" - -#include <vector> - -namespace armnn -{ - -void RefSoftmaxUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxUint8Workload_Execute"); - - const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); - - auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); - - std::vector<float> results(tensorInfo.GetNumElements()); - - Softmax(dequant.data(), - results.data(), - tensorInfo, - m_Data.m_Parameters.m_Beta); - - Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp deleted file mode 100644 index b179d529da..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> -{ -public: - using Uint8Workload<SoftmaxQueueDescriptor>::Uint8Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp deleted file mode 100644 index 75611dacf3..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefSplitterFloat32Workload.hpp" - -#include "Splitter.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefSplitterFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterFloat32Workload_Execute"); - Splitter<float>(m_Data); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp deleted file mode 100644 index 12176dd277..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefSplitterFloat32Workload : public Float32Workload<SplitterQueueDescriptor> -{ -public: - using Float32Workload<SplitterQueueDescriptor>::Float32Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp deleted file mode 100644 index ca9f5db850..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefSplitterUint8Workload.hpp" - -#include "Splitter.hpp" - -#include "Profiling.hpp" - -namespace armnn -{ - -void RefSplitterUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterUint8Workload_Execute"); - Splitter<uint8_t>(m_Data); -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp deleted file mode 100644 index e80cb1a654..0000000000 --- a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -namespace armnn -{ - -class RefSplitterUint8Workload : public Uint8Workload<SplitterQueueDescriptor> -{ -public: - using Uint8Workload<SplitterQueueDescriptor>::Uint8Workload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp deleted file mode 100644 index 616a875028..0000000000 --- a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp +++ /dev/null @@ -1,138 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/CpuTensorHandle.hpp" - -#include <armnn/Tensor.hpp> -#include <armnn/Types.hpp> -#include <Half.hpp> - -#include <boost/polymorphic_cast.hpp> - -namespace armnn -{ - -//////////////////////////////////////////// -/// float32 helpers -//////////////////////////////////////////// - -inline const TensorInfo& GetTensorInfo(const ITensorHandle* tensorHandle) -{ - // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. - const ConstCpuTensorHandle* cpuTensorHandle = - boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle); - return cpuTensorHandle->GetTensorInfo(); -} - -template <typename DataType> -inline const DataType* GetConstCpuData(const ITensorHandle* tensorHandle) -{ - // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. - const ConstCpuTensorHandle* cpuTensorHandle = - boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle); - return cpuTensorHandle->GetConstTensor<DataType>(); -} - -template <typename DataType> -inline DataType* GetCpuData(const ITensorHandle* tensorHandle) -{ - // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. - const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle); - return cpuTensorHandle->GetTensor<DataType>(); -}; - -template <typename DataType, typename PayloadType> -const DataType* GetInputTensorData(unsigned int idx, const PayloadType& data) -{ - const ITensorHandle* tensorHandle = data.m_Inputs[idx]; - return GetConstCpuData<DataType>(tensorHandle); -} - -template <typename DataType, typename PayloadType> -DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data) -{ - const ITensorHandle* tensorHandle = data.m_Outputs[idx]; - return GetCpuData<DataType>(tensorHandle); -} - -template <typename PayloadType> -const float* GetInputTensorDataFloat(unsigned int idx, const PayloadType& data) -{ - return GetInputTensorData<float>(idx, data); -} - -template <typename PayloadType> -float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data) -{ - return GetOutputTensorData<float>(idx, data); -} - -template <typename PayloadType> -const Half* GetInputTensorDataHalf(unsigned int idx, const PayloadType& data) -{ - return GetInputTensorData<Half>(idx, data); -} - -template <typename PayloadType> -Half* GetOutputTensorDataHalf(unsigned int idx, const PayloadType& data) -{ - return GetOutputTensorData<Half>(idx, data); -} - -//////////////////////////////////////////// -/// u8 helpers -//////////////////////////////////////////// - -inline const uint8_t* GetConstCpuU8Data(const ITensorHandle* tensorHandle) -{ - // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. - const ConstCpuTensorHandle* cpuTensorHandle = - boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle); - return cpuTensorHandle->GetConstTensor<uint8_t>(); -}; - -inline uint8_t* GetCpuU8Data(const ITensorHandle* tensorHandle) -{ - // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. - const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle); - return cpuTensorHandle->GetTensor<uint8_t>(); -}; - -template <typename PayloadType> -const uint8_t* GetInputTensorDataU8(unsigned int idx, const PayloadType& data) -{ - const ITensorHandle* tensorHandle = data.m_Inputs[idx]; - return GetConstCpuU8Data(tensorHandle); -} - -template <typename PayloadType> -uint8_t* GetOutputTensorDataU8(unsigned int idx, const PayloadType& data) -{ - const ITensorHandle* tensorHandle = data.m_Outputs[idx]; - return GetCpuU8Data(tensorHandle); -} - -template<typename T> -std::vector<float> Dequantize(const T* quant, const TensorInfo& info) -{ - std::vector<float> ret(info.GetNumElements()); - for (size_t i = 0; i < info.GetNumElements(); i++) - { - ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); - } - return ret; -} - -inline void Quantize(uint8_t* quant, const float* dequant, const TensorInfo& info) -{ - for (size_t i = 0; i < info.GetNumElements(); i++) - { - quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp deleted file mode 100644 index 0bce3c7ed8..0000000000 --- a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp +++ /dev/null @@ -1,92 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ResizeBilinear.hpp" - -#include "TensorBufferArrayView.hpp" - -#include <boost/numeric/conversion/cast.hpp> - -#include <cmath> -#include <algorithm> - -namespace armnn -{ - -namespace -{ - -inline float Lerp(float a, float b, float w) -{ - return w * b + (1.f - w) * a; -} - -} - -void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo) -{ - // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output - // image is projected into the input image to figure out the interpolants and weights. Note that this - // will yield different results than if projecting the centre of output texels. - - const unsigned int batchSize = inputInfo.GetShape()[0]; - const unsigned int channelCount = inputInfo.GetShape()[1]; - - const unsigned int inputHeight = inputInfo.GetShape()[2]; - const unsigned int inputWidth = inputInfo.GetShape()[3]; - const unsigned int outputHeight = outputInfo.GetShape()[2]; - const unsigned int outputWidth = outputInfo.GetShape()[3]; - - // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates - // in the input image. - const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight); - const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth); - - TensorBufferArrayView<const float> input(inputInfo.GetShape(), in); - TensorBufferArrayView<float> output(outputInfo.GetShape(), out); - - for (unsigned int n = 0; n < batchSize; ++n) - { - for (unsigned int c = 0; c < channelCount; ++c) - { - for (unsigned int y = 0; y < outputHeight; ++y) - { - // Corresponding real-valued height coordinate in input image. - const float iy = boost::numeric_cast<float>(y) * scaleY; - - // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation). - const float fiy = floorf(iy); - const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy); - - // Interpolation weight (range [0,1]). - const float yw = iy - fiy; - - for (unsigned int x = 0; x < outputWidth; ++x) - { - // Real-valued and discrete width coordinates in input image. - const float ix = boost::numeric_cast<float>(x) * scaleX; - const float fix = floorf(ix); - const unsigned int x0 = boost::numeric_cast<unsigned int>(fix); - - // Interpolation weight (range [0,1]). - const float xw = ix - fix; - - // Discrete width/height coordinates of texels below and to the right of (x0, y0). - const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u); - const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u); - - // Interpolation - const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0. - const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1. - const float l = Lerp(ly0, ly1, yw); - - output.Get(n, c, y, x) = l; - } - } - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp deleted file mode 100644 index 847b8e8bef..0000000000 --- a/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp +++ /dev/null @@ -1,15 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> - -namespace armnn -{ - -void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo); - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Softmax.cpp b/src/armnn/backends/RefWorkloads/Softmax.cpp deleted file mode 100644 index 4f1016e86c..0000000000 --- a/src/armnn/backends/RefWorkloads/Softmax.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "Softmax.hpp" - -#include <cmath> -#include <vector> - -namespace armnn -{ - -/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. -void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta) -{ - unsigned int numChannels = tensorInfo.GetShape()[1]; - for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++) - { - // Find maximum channel. - float max = in[n * numChannels]; - for (unsigned int c = 1; c < numChannels; c++) - { - float val = in[n * numChannels + c]; - if (val > max) - { - max = val; - } - } - - // Exponentiate all values and sum. - std::vector<float> exponentials(numChannels); - float sum = 0.0f; - for (unsigned int c = 0; c < numChannels; c++) - { - float val = in[n * numChannels + c]; - exponentials[c] = expf((val - max) * beta); - sum += exponentials[c]; - } - - // Divide exponentials by sum to give outputs. - for (unsigned int c = 0; c < numChannels; c++) - { - out[n * numChannels + c] = exponentials[c] / sum; - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Softmax.hpp b/src/armnn/backends/RefWorkloads/Softmax.hpp deleted file mode 100644 index 3b974f9e9e..0000000000 --- a/src/armnn/backends/RefWorkloads/Softmax.hpp +++ /dev/null @@ -1,16 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> - -namespace armnn -{ - -/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. -void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta); - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp deleted file mode 100644 index e9c0379c9e..0000000000 --- a/src/armnn/backends/RefWorkloads/Splitter.hpp +++ /dev/null @@ -1,84 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "RefWorkloadUtils.hpp" - -#include "backends/WorkloadData.hpp" - -#include <armnn/Tensor.hpp> - -#include <boost/assert.hpp> - -namespace armnn -{ - -template <typename DataType> -void Splitter(const SplitterQueueDescriptor& data) -{ - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); - - for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index) - { - unsigned int indices[MaxNumOfTensorDimensions] = { 0 }; - - unsigned int indexRemainder = index; - unsigned int dimensionStride = inputInfo0.GetNumElements(); - - for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++) - { - dimensionStride /= inputInfo0.GetShape()[i]; - indices[i] = indexRemainder / dimensionStride; // Use integer division to round down. - indexRemainder -= indices[i] * dimensionStride; - } - - for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx) - { - SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; - - //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]); - BOOST_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions()); - - // Check all dimensions to see if this element is inside the given input view. - bool insideView = true; - for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++) - { - if (indices[i] < view.m_Origin[i]) - { - insideView = false; - } - if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i]) - { - insideView = false; - } - } - - if (insideView) - { - unsigned int outIndex = 0; - unsigned int dimensionStride = 1; - - for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;) - { - outIndex += dimensionStride * (indices[i] - view.m_Origin[i]); - dimensionStride *= outputInfo.GetShape()[i]; - } - - //We are within the view, to copy input data to the output corresponding to this view. - DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data); - BOOST_ASSERT(outputData); - - const DataType* inputData = GetInputTensorData<DataType>(0, data); - BOOST_ASSERT(inputData); - - outputData[outIndex] = inputData[index]; - } - } - } -} - -} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp deleted file mode 100644 index e19810ca87..0000000000 --- a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <armnn/Tensor.hpp> - -#include <boost/assert.hpp> - -namespace armnn -{ - -// Utility class providing access to raw tensor memory based on indices along each dimension. -template <typename DataType> -class TensorBufferArrayView -{ -public: - TensorBufferArrayView(const TensorShape& shape, DataType* data) - : m_Shape(shape) - , m_Data(data) - { - } - - DataType& Get(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const - { - BOOST_ASSERT( b < m_Shape[0] || (m_Shape[0] == 0 && b == 0) ); - BOOST_ASSERT( c < m_Shape[1] || (m_Shape[1] == 0 && c == 0) ); - BOOST_ASSERT( h < m_Shape[2] || (m_Shape[2] == 0 && h == 0) ); - BOOST_ASSERT( w < m_Shape[3] || (m_Shape[3] == 0 && w == 0) ); - - return m_Data[b * m_Shape[1] * m_Shape[2] * m_Shape[3] - + c * m_Shape[2] * m_Shape[3] - + h * m_Shape[3] - + w]; - } - -private: - const TensorShape m_Shape; - DataType* m_Data; -}; - -} //namespace armnn diff --git a/src/armnn/backends/StringMapping.cpp b/src/armnn/backends/StringMapping.cpp deleted file mode 100644 index 3ca8843812..0000000000 --- a/src/armnn/backends/StringMapping.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "StringMapping.hpp" - -namespace armnn -{ - -const StringMapping& StringMapping::Instance() -{ - static StringMapping instance; - return instance; -} - -} // armnn diff --git a/src/armnn/backends/StringMapping.hpp b/src/armnn/backends/StringMapping.hpp deleted file mode 100644 index 6312e68945..0000000000 --- a/src/armnn/backends/StringMapping.hpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -namespace armnn -{ - -/// -/// StringMapping is helper class to be able to use strings as template -/// parameters, so this allows simplifying code which only differs in -/// a string, such as a debug string literal. -/// -struct StringMapping -{ -public: - enum Id { - RefAdditionWorkload_Execute, - RefSubtractionWorkload_Execute, - RefMultiplicationWorkload_Execute, - RefDivisionWorkload_Execute, - MAX_STRING_ID - }; - - const char * Get(Id id) const - { - return m_Strings[id]; - } - - static const StringMapping& Instance(); - -private: - StringMapping() - { - m_Strings[RefAdditionWorkload_Execute] = "RefAdditionWorkload_Execute"; - m_Strings[RefSubtractionWorkload_Execute] = "RefSubtractionWorkload_Execute"; - m_Strings[RefMultiplicationWorkload_Execute] = "RefMultiplicationWorkload_Execute"; - m_Strings[RefDivisionWorkload_Execute] = "RefDivisionWorkload_Execute"; - } - - StringMapping(const StringMapping &) = delete; - StringMapping& operator=(const StringMapping &) = delete; - - const char * m_Strings[MAX_STRING_ID]; -}; - -} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/Workload.hpp b/src/armnn/backends/Workload.hpp deleted file mode 100644 index cf9c6f21e5..0000000000 --- a/src/armnn/backends/Workload.hpp +++ /dev/null @@ -1,147 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "WorkloadData.hpp" -#include "WorkloadInfo.hpp" -#include <algorithm> -#include "Profiling.hpp" - -namespace armnn -{ - -// Workload interface to enqueue a layer computation. -class IWorkload -{ -public: - virtual ~IWorkload() {} - - virtual void Execute() const = 0; -}; - -// NullWorkload used to denote an unsupported workload when used by the MakeWorkload<> template -// in the various workload factories. -// There should never be an instantiation of a NullWorkload. -class NullWorkload : public IWorkload -{ - NullWorkload()=delete; -}; - -template <typename QueueDescriptor> -class BaseWorkload : public IWorkload -{ -public: - - BaseWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) - : m_Data(descriptor) - { - m_Data.Validate(info); - } - - const QueueDescriptor& GetData() const { return m_Data; } - -protected: - const QueueDescriptor m_Data; -}; - -// TypedWorkload used -template <typename QueueDescriptor, armnn::DataType... DataTypes> -class TypedWorkload : public BaseWorkload<QueueDescriptor> -{ -public: - - TypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<QueueDescriptor>(descriptor, info) - { - std::vector<armnn::DataType> dataTypes = {DataTypes...}; - armnn::DataType expectedInputType; - - if (!info.m_InputTensorInfos.empty()) - { - expectedInputType = info.m_InputTensorInfos.front().GetDataType(); - - if (std::find(dataTypes.begin(), dataTypes.end(), expectedInputType) == dataTypes.end()) - { - BOOST_ASSERT_MSG(false, "Trying to create workload with incorrect type"); - } - BOOST_ASSERT_MSG(std::all_of(std::next(info.m_InputTensorInfos.begin()), - info.m_InputTensorInfos.end(), - [&](auto it){ - return it.GetDataType() == expectedInputType; - }), - "Trying to create workload with incorrect type"); - } - armnn::DataType expectedOutputType; - - if (!info.m_OutputTensorInfos.empty()) - { - expectedOutputType = info.m_OutputTensorInfos.front().GetDataType(); - - if (!info.m_InputTensorInfos.empty()) - { - if (expectedOutputType != expectedInputType) - { - BOOST_ASSERT_MSG(false, "Trying to create workload with incorrect type"); - } - } - else if (std::find(dataTypes.begin(), dataTypes.end(), expectedOutputType) == dataTypes.end()) - { - BOOST_ASSERT_MSG(false, "Trying to create workload with incorrect type"); - } - BOOST_ASSERT_MSG(std::all_of(std::next(info.m_OutputTensorInfos.begin()), - info.m_OutputTensorInfos.end(), - [&](auto it){ - return it.GetDataType() == expectedOutputType; - }), - "Trying to create workload with incorrect type"); - } - } -}; - -template <typename QueueDescriptor, armnn::DataType InputDataType, armnn::DataType OutputDataType> -class MultiTypedWorkload : public BaseWorkload<QueueDescriptor> -{ -public: - - MultiTypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<QueueDescriptor>(descriptor, info) - { - BOOST_ASSERT_MSG(std::all_of(info.m_InputTensorInfos.begin(), - info.m_InputTensorInfos.end(), - [&](auto it){ - return it.GetDataType() == InputDataType; - }), - "Trying to create workload with incorrect type"); - BOOST_ASSERT_MSG(std::all_of(info.m_OutputTensorInfos.begin(), - info.m_OutputTensorInfos.end(), - [&](auto it){ - return it.GetDataType() == OutputDataType; - }), - "Trying to create workload with incorrect type"); - } -}; - -template <typename QueueDescriptor> -using FloatWorkload = TypedWorkload<QueueDescriptor, - armnn::DataType::Float16, - armnn::DataType::Float32>; - -template <typename QueueDescriptor> -using Float32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Float32>; - -template <typename QueueDescriptor> -using Uint8Workload = TypedWorkload<QueueDescriptor, armnn::DataType::QuantisedAsymm8>; - -template <typename QueueDescriptor> -using Float16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor, - armnn::DataType::Float16, - armnn::DataType::Float32>; - -template <typename QueueDescriptor> -using Float32ToFloat16Workload = MultiTypedWorkload<QueueDescriptor, - armnn::DataType::Float32, - armnn::DataType::Float16>; - -} //namespace armnn diff --git a/src/armnn/backends/WorkloadData.cpp b/src/armnn/backends/WorkloadData.cpp deleted file mode 100644 index c5c607d954..0000000000 --- a/src/armnn/backends/WorkloadData.cpp +++ /dev/null @@ -1,871 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "WorkloadData.hpp" - -#include "CpuTensorHandle.hpp" -#include "WorkloadInfo.hpp" - -#include <algorithm> -#include <string> -#include <sstream> -#include <iomanip> - -#include <boost/format.hpp> - -namespace armnn -{ - -//--------------------------------------------------------------- -DataType GetBiasDataType(DataType inputDataType) -{ - switch (inputDataType) - { - case DataType::Float16: - return DataType::Float16; - case DataType::Float32: - return DataType::Float32; - case DataType::QuantisedAsymm8: - return DataType::Signed32; - default: - BOOST_ASSERT_MSG(false, "Invalid input data type"); - return DataType::Float32; - } -} - -namespace -{ - -//--------------------------------------------------------------- -//android ndk does not support std::to_string function. -template <typename T> -std::string to_string(T value) -{ - std::ostringstream os; - os << value; - return os.str(); -} - -//--------------------------------------------------------------- -void ValidatePointer(const void* ptr, std::string const& descName, std::string const& paramName) -{ - if (!ptr) - { - throw InvalidArgumentException(descName + ": Invalid null pointer. The " + - paramName + " parameter must be set."); - } -} - -//--------------------------------------------------------------- -void ValidateTensorShapesMatch(const TensorInfo& first, - const TensorInfo& second, - std::string const& descName, - std::string const& firstName, - std::string const& secondName) -{ - if (first.GetShape() != second.GetShape()) - { - throw InvalidArgumentException(descName + ": " - + firstName + " & " + secondName + " must have identical shapes"); - } -} - -//--------------------------------------------------------------- -void ValidateNoInputs(const WorkloadInfo& workloadInfo, std::string const& descName) -{ - if (workloadInfo.m_InputTensorInfos.size() != 0) - { - throw InvalidArgumentException(descName + - ": Requires no inputs. " + - to_string(workloadInfo.m_InputTensorInfos.size()) + " has been provided."); - } -} - -//--------------------------------------------------------------- -void ValidateSingleInput(const WorkloadInfo& workloadInfo, std::string const& descName) -{ - if (workloadInfo.m_InputTensorInfos.size() != 1) - { - throw InvalidArgumentException(descName + - ": Requires exactly one input. " + - to_string(workloadInfo.m_InputTensorInfos.size()) + " has been provided." ); - } -} - -//--------------------------------------------------------------- -void ValidateTwoInputs(const WorkloadInfo& workloadInfo, std::string const& descName) -{ - if (workloadInfo.m_InputTensorInfos.size() != 2) - { - throw InvalidArgumentException(descName + - ": Requires exactly two workloadInfo.m_InputTensorInfos. " + - to_string(workloadInfo.m_InputTensorInfos.size()) + " have been provided."); - } -} - -//--------------------------------------------------------------- -void ValidateSingleOutput(const WorkloadInfo& workloadInfo, std::string const& descName) -{ - if (workloadInfo.m_OutputTensorInfos.size() != 1) - { - throw InvalidArgumentException(descName + - ": Requires exactly one output. " + - to_string(workloadInfo.m_OutputTensorInfos.size()) + " has been provided."); - } -} - -//--------------------------------------------------------------- -void ValidateTensorNumDimensions(const TensorInfo& tensor, - std::string const& descName, - unsigned int numDimensions, - std::string const& tensorName) -{ - if (tensor.GetNumDimensions() != numDimensions) - { - throw InvalidArgumentException(descName + ": Expected " + to_string(numDimensions) + " but got " + - to_string(tensor.GetNumDimensions()) + " dimensions for " + - tensorName + " tensor."); - } -} - -//--------------------------------------------------------------- -void ValidateTensorDataType(const TensorInfo& tensor, DataType dataType, - const std::string& descName, std::string const& tensorName) -{ - if (tensor.GetDataType() != dataType) - { - throw InvalidArgumentException(descName + ": Expected data type " + GetDataTypeName(dataType) + " but got " + - GetDataTypeName(tensor.GetDataType()) + " for " + tensorName + " tensor."); - } -} - -//--------------------------------------------------------------- -void ValidateBiasTensorQuantization(const TensorInfo& biasTensor, const TensorInfo& inputTensorInfo, - const TensorInfo& weightsTensorInfo, const std::string& descName) -{ - if (biasTensor.GetQuantizationOffset() != 0) - { - throw InvalidArgumentException(descName + ": Expected zero quantization offset for bias tensor but got " + - to_string(biasTensor.GetQuantizationOffset())); - } - const float expectedScale = inputTensorInfo.GetQuantizationScale() * weightsTensorInfo.GetQuantizationScale(); - if (std::abs(biasTensor.GetQuantizationScale() - expectedScale) > 0.000000001f) - { - // Print the float values with extra precision to see very small differences - std::stringstream msg; - msg << std::setprecision(10) << descName << ": Expected " << expectedScale << - " quantization scale for bias tensor (the product of the input and weight scales), but got " << - biasTensor.GetQuantizationScale(); - throw InvalidArgumentException(msg.str()); - } -} - -//--------------------------------------------------------------- -void ValidateTensors(const std::vector<ITensorHandle*>& vec, - unsigned int numExpected, - const std::string& descName, - const std::string& varName) -{ - if (vec.empty() && numExpected > 0) - { - throw InvalidArgumentException(descName + ": Invalid empty " + varName + " array."); - } - - for (unsigned int i = 0; i < numExpected; ++i) - { - if (!vec[i]) - { - throw InvalidArgumentException(descName + ": Invalid NULL for " + varName + to_string(i)); - } - } -} - -//--------------------------------------------------------------- -void ValidateBroadcastTensorShapesMatch(const TensorInfo& first, - const TensorInfo& second, - const TensorInfo& output, - std::string const& descName, - std::string const& firstName, - std::string const& secondName) -{ - // Tensors must have the same number of dimensions in order to be explicit about which dimensions will get - // broadcasted. - if (first.GetNumDimensions() != second.GetNumDimensions()) - { - throw InvalidArgumentException(descName + ": Tensors " - + firstName + " & " + secondName - + " must have the same number of dimensions in order to be broadcasted"); - } - uint32_t numDims = first.GetNumDimensions(); - std::vector<uint32_t> outputDims(numDims, 0u); - for (uint32_t i = 0; i < numDims; i++) - { - const bool dimsNotEqual = first.GetShape()[i] != second.GetShape()[i]; - const bool dimsNotOne = (first.GetShape()[i] != 1) && (second.GetShape()[i] != 1); - if (dimsNotEqual && dimsNotOne) - { - throw InvalidArgumentException("Broadcasting is not possible for incompatible shapes"); - } - outputDims[i] = std::max(first.GetShape()[i], second.GetShape()[i]); - } - TensorShape broadcastShape = TensorShape(boost::numeric_cast<unsigned int>(outputDims.size()), outputDims.data()); - if (broadcastShape != output.GetShape()) - { - throw InvalidArgumentException(descName + ": The tensor shape resulting from adding " - + firstName + " & " + secondName - + " does not match the output shape"); - } -} - -//--------------------------------------------------------------- -/// Validates that the output tensor's quantization scale is greater than the product -/// of the two input tensors' quantization scales. This is a requirement of the implementation of -/// the quantized multiplication. -void ValidateTensorQuantizationMultiplier(const TensorInfo& inputTensor1, const TensorInfo& inputTensor2, - const TensorInfo& outputTensorInfo, std::string const& descName, - const std::string& inputTensor1Name, const std::string& inputTensor2Name, const std::string& outputTensorName) -{ - if (outputTensorInfo.GetDataType() == DataType::QuantisedAsymm8) - { - if (outputTensorInfo.GetQuantizationScale() <= - inputTensor1.GetQuantizationScale() * inputTensor2.GetQuantizationScale()) - { - std::stringstream msg; - msg << descName << ": Quantization scale of " << outputTensorName << " is not greater than " << - "the product of the " << inputTensor1Name << " and " << inputTensor2Name << " tensors"; - throw InvalidArgumentException(msg.str()); - } - } -} - -} //namespace - -void QueueDescriptor::ValidateInputsOutputs(const std::string& descName, - unsigned int numExpectedIn, unsigned int numExpectedOut) const -{ - ValidateTensors(m_Inputs, numExpectedIn, descName, "input"); - ValidateTensors(m_Outputs, numExpectedOut, descName, "output"); -} - -//--------------------------------------------------------------- -void MemCopyQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "MemCopyQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "MemCopyQueueDescriptor"); - - if (workloadInfo.m_InputTensorInfos.size() != workloadInfo.m_OutputTensorInfos.size()) - { - throw InvalidArgumentException(boost::str( - boost::format("Number of input infos (%1%) does not match the number of output infos (%2%)") - % workloadInfo.m_InputTensorInfos.size() % workloadInfo.m_OutputTensorInfos.size())); - } - - for (std::size_t i = 0; i < workloadInfo.m_InputTensorInfos.size(); ++i) - { - if (workloadInfo.m_InputTensorInfos[i].GetNumElements() != - workloadInfo.m_OutputTensorInfos[i].GetNumElements()) - { - throw InvalidArgumentException(boost::str( - boost::format("Number of elements for tensor input and output %1% does not match") - % i )); - } - } - - if (m_Inputs.size() != m_Outputs.size()) - { - throw InvalidArgumentException(boost::str( - boost::format("Number of inputs (%1%) does not match the number of outputs (%2%)") - % m_Inputs.size() % m_Outputs.size())); - } - - for (unsigned int i = 0; i < m_Inputs.size(); ++i) - { - if (!m_Inputs[i]) - { - throw InvalidArgumentException(boost::str(boost::format("Invalid null input %1%") % i)); - } - - if (!m_Outputs[i]) - { - throw InvalidArgumentException(boost::str(boost::format("Invalid null output %1%") % i)); - } - } -} - -//--------------------------------------------------------------- -void ActivationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "ActivationQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "ActivationQueueDescriptor"); - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "ActivationQueueDescriptor", - "input", - "output"); -} - -//--------------------------------------------------------------- -void SoftmaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "SoftmaxQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "SoftmaxQueueDescriptor"); - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "SoftmaxQueueDescriptor", 2, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "SoftmaxQueueDescriptor", 2, "output"); - - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "SoftmaxQueueDescriptor", - "input", - "output"); -} - -//--------------------------------------------------------------- -void SplitterQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "SplitterQueueDescriptor"); - - if (workloadInfo.m_OutputTensorInfos.size() <= 0) - { - throw InvalidArgumentException("SplitterQueueDescriptor: At least one output needs to be provided."); - } - - if (workloadInfo.m_OutputTensorInfos.size() != m_ViewOrigins.size()) - { - throw InvalidArgumentException( - "SplitterQueueDescriptor: Number of split windows " - "has to match number of workloadInfo.m_OutputTensorInfos. " - "Number of windows: " + - to_string(m_ViewOrigins.size()) + - ". Number of workloadInfo.m_OutputTensorInfos: " + to_string(workloadInfo.m_OutputTensorInfos.size())); - } - - //The dimensionality of all the windows has to match the dimensionality (not shape) of the input. - std::size_t inputDims = workloadInfo.m_InputTensorInfos[0].GetNumDimensions(); - for(unsigned int w = 0; w < m_ViewOrigins.size(); ++w ) - { - //Checks that the dimensionality of input is same as the split windows. - ViewOrigin const& e = m_ViewOrigins[w]; - if (e.m_Origin.size() != inputDims) - { - throw InvalidArgumentException("SplitterQueueDescriptor: Window origin have to " - "have the same dimensionality as the input tensor. " - "Window origin (index: " + - to_string(w) + ") has " + to_string(e.m_Origin.size()) + - " dimensions, the input " - "tensor has " + - to_string(inputDims) + " dimensions."); - } - for (unsigned int i = 0; i < e.m_Origin.size(); ++i) - { - if (e.m_Origin[i] + workloadInfo.m_OutputTensorInfos[w].GetShape()[i] > - workloadInfo.m_InputTensorInfos[0].GetShape()[i]) - { - throw InvalidArgumentException("SplitterQueueDescriptor: Window extent coordinates have to " - "be smaller or equal than the size of the input in that coord."); - } - } - } -} - -//--------------------------------------------------------------- -void MergerQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleOutput(workloadInfo, "MergerQueueDescriptor"); - - if (m_Inputs.size() <= 0) - { - throw InvalidArgumentException("MergerQueueDescriptor: At least one input needs to be provided."); - } - if (m_Outputs.size() <= 0) - { - throw InvalidArgumentException("MergerQueueDescriptor: At least one output needs to be provided."); - } - - if (workloadInfo.m_InputTensorInfos.size() <= 0) - { - throw InvalidArgumentException("MergerQueueDescriptor: At least one TensorInfo input needs to be provided."); - } - if (workloadInfo.m_OutputTensorInfos.size() <= 0) - { - throw InvalidArgumentException("MergerQueueDescriptor: At least one TensorInfo output needs to be provided."); - } - - if (workloadInfo.m_InputTensorInfos.size() != m_ViewOrigins.size()) - { - throw InvalidArgumentException( - "MergerQueueDescriptor: Number of split windows " - "has to match number of workloadInfo.m_InputTensorInfos. " - "Number of windows: " + - to_string(m_ViewOrigins.size()) + - ". Number of workloadInfo.m_InputTensorInfos: " + to_string(workloadInfo.m_InputTensorInfos.size())); - } - - //The dimensionality of all the windows has to match the dimensionality (not shape) of the output. - std::size_t outputDims = workloadInfo.m_OutputTensorInfos[0].GetNumDimensions(); - for(unsigned int w = 0; w < m_ViewOrigins.size(); ++w ) - { - //Checks that the dimensionality of output is same as the split windows. - ViewOrigin const& e = m_ViewOrigins[w]; - if (e.m_Origin.size() != outputDims) - { - throw InvalidArgumentException("MergerQueueDescriptor: Window origin have to " - "have the same dimensionality as the output tensor. " - "Window origin (index: " + - to_string(w) + ") has " + to_string(e.m_Origin.size()) + - " dimensions, the output " - "tensor has " + - to_string(outputDims) + " dimensions."); - } - //Checks that the merge windows are within the output tensor. - for (unsigned int i = 0; i < e.m_Origin.size(); ++i) - { - if (e.m_Origin[i] + workloadInfo.m_InputTensorInfos[w].GetShape()[i] - > workloadInfo.m_OutputTensorInfos[0].GetShape()[i]) - { - throw InvalidArgumentException("MergerQueueDescriptor: Window extent coordinates have to " - "be smaller or equal than the size of the output in that coord."); - } - } - } -} - -//--------------------------------------------------------------- -void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "FullyConnectedQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "FullyConnectedQueueDescriptor"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "FullyConnectedQueueDescriptor", 2, "output"); - - if (!(workloadInfo.m_InputTensorInfos[0].GetNumDimensions() == 2 || - workloadInfo.m_InputTensorInfos[0].GetNumDimensions() == 4)) - { - throw InvalidArgumentException("FullyConnectedQueueDescriptor: Input tensor must have 2 or 4 dimensions."); - } - - if (m_Weight == nullptr) - { - throw InvalidArgumentException("FullyConnectedQueueDescriptor: Weight tensor descriptor is missing."); - } - - ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "FullyConnectedQueueDescriptor", 2, "weight"); - - if (m_Parameters.m_BiasEnabled) - { - if (m_Bias == nullptr) - { - throw InvalidArgumentException("FullyConnectedQueueDescriptor: Bias is enabled but " - "bias value tensor descriptor is missing."); - } - - // Validates type and quantization values. - ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), - workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "FullyConnectedQueueDescriptor"); - - ValidateTensorDataType(m_Bias->GetTensorInfo(), - GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), - "FullyConnectedQueueDescriptor", "bias"); - - ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "FullyConnectedQueueDescriptor", 1, "bias"); - } - - ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), - workloadInfo.m_OutputTensorInfos[0], "FullyConnectedQueueDescriptor", "input", "weights", "output"); -} - -//--------------------------------------------------------------- -void NormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "NormalizationQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "NormalizationQueueDescriptor"); - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "NormalizationQueueDescriptor", - "input", - "output"); -} - -void AdditionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateTwoInputs(workloadInfo, "AdditionQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "AdditionQueueDescriptor"); - - ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_InputTensorInfos[1], - workloadInfo.m_OutputTensorInfos[0], - "AdditionQueueDescriptor", - "first input", - "second input"); - -} - -//--------------------------------------------------------------- -void MultiplicationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateTwoInputs(workloadInfo, "MultiplicationQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "MultiplicationQueueDescriptor"); - - ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_InputTensorInfos[1], - workloadInfo.m_OutputTensorInfos[0], - "MultiplicationQueueDescriptor", - "first input", - "second input"); -} - -void BatchNormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "BatchNormalizationQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "BatchNormalizationQueueDescriptor"); - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "BatchNormalizationQueueDescriptor", - "input", - "output"); - ValidatePointer(m_Mean, "BatchNormalizationQueueDescriptor", "mean"); - ValidatePointer(m_Variance, "BatchNormalizationQueueDescriptor", "variance"); - ValidatePointer(m_Beta, "BatchNormalizationQueueDescriptor", "beta"); - ValidatePointer(m_Gamma, "BatchNormalizationQueueDescriptor", "gamma"); - - - ValidateTensorNumDimensions(m_Mean->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "mean"); - ValidateTensorNumDimensions(m_Variance->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "variance"); - ValidateTensorNumDimensions(m_Beta->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "beta"); - ValidateTensorNumDimensions(m_Gamma->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "gamma"); - - ValidateTensorShapesMatch( - m_Mean->GetTensorInfo(), m_Variance->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "variance"); - ValidateTensorShapesMatch( - m_Mean->GetTensorInfo(), m_Beta->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "beta"); - ValidateTensorShapesMatch( - m_Mean->GetTensorInfo(), m_Gamma->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "gamma"); -} - -void Convolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "Convolution2dQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "Convolution2dQueueDescriptor"); - - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "Convolution2dQueueDescriptor", 4, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "Convolution2dQueueDescriptor", 4, "output"); - - ValidatePointer(m_Weight, "Convolution2dQueueDescriptor", "weight"); - ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "Convolution2dQueueDescriptor", 4, "weight"); - ValidateTensorDataType(m_Weight->GetTensorInfo(), workloadInfo.m_InputTensorInfos[0].GetDataType(), - "Convolution2dQueueDescriptor", "weight"); - if (m_Parameters.m_BiasEnabled) - { - ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "Convolution2dQueueDescriptor", 1, "bias"); - ValidateTensorDataType(m_Bias->GetTensorInfo(), - GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), - "Convolution2dQueueDescriptor", "bias"); - ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), - workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "Convolution2dQueueDescriptor"); - } - - ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), - workloadInfo.m_OutputTensorInfos[0], "Convolution2dQueueDescriptor", "input", "weights", "output"); -} - -void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "DepthwiseConvolution2dQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "DepthwiseConvolution2dQueueDescriptor"); - - ValidateTensorNumDimensions( - workloadInfo.m_InputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", 4, "input"); - ValidateTensorNumDimensions( - workloadInfo.m_OutputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", 4, "output"); - - ValidatePointer(m_Weight, "DepthwiseConvolution2dQueueDescriptor", "weight"); - ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor", 4, "weight"); - - //inputChannels * channelMultiplier should be equal to outputChannels. - const unsigned int numWeightChannelMultiplier = m_Weight->GetTensorInfo().GetShape()[0]; - const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[1]; - const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[1]; - if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels) - { - throw InvalidArgumentException( - boost::str(boost::format("DepthwiseConvolution2dQueueDescriptor: output_channels (provided %1%) should be " - "equal to input_channels (provided %2%) multiplied by channel_multiplier " - "(provided %3%).") - % numWeightOutputChannels % numWeightInputChannels % numWeightChannelMultiplier)); - } - - if (m_Parameters.m_BiasEnabled) - { - ValidatePointer(m_Bias, "DepthwiseConvolution2dQueueDescriptor", "bias"); - ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor", 1, "bias"); - ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), - workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor"); - - ValidateTensorDataType(m_Bias->GetTensorInfo(), - GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), - "DepthwiseConvolution2dQueueDescriptor", "bias"); - } - - ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), - workloadInfo.m_OutputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", "input", "weights", "output"); -} - -void PermuteQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "PermuteQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "PermuteQueueDescriptor"); - - const PermutationVector& mapping = m_Parameters.m_DimMappings; - - const TensorInfo& input = workloadInfo.m_InputTensorInfos[0]; - const TensorInfo& output = workloadInfo.m_OutputTensorInfos[0]; - - ValidateTensorNumDimensions(input, "PermuteQueueDescriptor", mapping.GetSize(), "input"); - ValidateTensorNumDimensions(output, "PermuteQueueDescriptor", mapping.GetSize(), "output"); - - for (unsigned int i = 0; i < mapping.GetSize(); ++i) - { - if (input.GetShape()[i] != output.GetShape()[mapping[i]]) - { - throw InvalidArgumentException("PermuteQueueDescriptor: src dimension " + to_string(i) + - " (=" + to_string(input.GetShape()[i]) + ") " + - "must match dst dimension " + to_string(mapping[i]) + - " (=" + to_string(output.GetShape()[mapping[i]]) + ")"); - } - } -} - -void Pooling2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "Pooling2dQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "Pooling2dQueueDescriptor"); - - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "Pooling2dQueueDescriptor", 4, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "Pooling2dQueueDescriptor", 4, "output"); -} - -void ResizeBilinearQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "ResizeBilinearQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "ResizeBilinearQueueDescriptor"); - - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "ResizeBilinearQueueDescriptor", 4, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "ResizeBilinearQueueDescriptor", 4, "output"); - - // Resizes bilinear only changes width and height: batch and channel count must match. - { - const unsigned int inputBatchSize = workloadInfo.m_InputTensorInfos[0].GetShape()[0]; - const unsigned int outputBatchSize = workloadInfo.m_OutputTensorInfos[0].GetShape()[0]; - if (inputBatchSize != outputBatchSize) - { - throw InvalidArgumentException( - boost::str(boost::format("ResizeBilinearQueueDescriptor: Input batch size (%1%) " - "does not match output batch size (%2%)") % inputBatchSize % outputBatchSize)); - } - } - - { - const unsigned int inputChannelCount = workloadInfo.m_InputTensorInfos[0].GetShape()[1]; - const unsigned int outputChannelCount = workloadInfo.m_OutputTensorInfos[0].GetShape()[1]; - if (inputChannelCount != outputChannelCount) - { - throw InvalidArgumentException( - boost::str(boost::format("ResizeBilinearQueueDescriptor: Input channel count (%1%) " - "does not match output channel count (%2%)") % inputChannelCount % outputChannelCount)); - } - } -} - -void FakeQuantizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "FakeQuantizationQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "FakeQuantizationQueueDescriptor"); - - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "FakeQuantizationQueueDescriptor", 2, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "FakeQuantizationQueueDescriptor", 2, "output"); - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "FakeQuantizationQueueDescriptor", - "input", - "output"); - if (m_Parameters.m_Min > m_Parameters.m_Max) - { - throw InvalidArgumentException("FakeQuantizationQueueDescriptor: min cannot be greater than max"); - } - -} - -void L2NormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "L2NormalizationQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "L2NormalizationQueueDescriptor"); - - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "L2NormalizationQueueDescriptor", 4, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "L2NormalizationQueueDescriptor", 4, "output"); - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "L2NormalizationQueueDescriptor", - "input", - "output"); -} - -void ConstantQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateNoInputs(workloadInfo, "ConstantQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "ConstantQueueDescriptor"); - - if (!m_LayerOutput) - { - throw InvalidArgumentException("ConstantQueueDescriptor: No const input specified"); - } - - ValidateTensorShapesMatch(m_LayerOutput->GetTensorInfo(), - workloadInfo.m_OutputTensorInfos[0], - "ConstantQueueDescriptor", - "constant", - "output"); -} - -void ReshapeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "ReshapeQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "ReshapeQueueDescriptor"); - - if (workloadInfo.m_InputTensorInfos[0].GetNumElements() != workloadInfo.m_OutputTensorInfos[0].GetNumElements()) - { - throw InvalidArgumentException("ReshapeQueueDescriptor: Input tensor has " + - to_string(workloadInfo.m_InputTensorInfos[0].GetNumElements()) + " but output tensor has " + - to_string(workloadInfo.m_OutputTensorInfos[0].GetNumElements()) + " elements."); - } -} - -void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "FloorQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "FlootQueueDescriptor"); - - if (workloadInfo.m_InputTensorInfos[0] != workloadInfo.m_OutputTensorInfos[0]) - { - throw InvalidArgumentException("FloorQueueDescriptor: Input and output tensor infos do not match."); - } -} - -void LstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "LstmQueueDescriptor", 2, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "LstmQueueDescriptor", 2, "output"); -} - -void ConvertFp32ToFp16QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "ConvertFp32ToFp16QueueDescriptor"); - ValidateSingleOutput(workloadInfo, "ConvertFp32ToFp16QueueDescriptor"); - - if (workloadInfo.m_InputTensorInfos[0].GetDataType() != DataType::Float32) - { - throw InvalidArgumentException("ConvertFp32ToFp16QueueDescriptor: Input tensor type must be Float32."); - } - - if (workloadInfo.m_OutputTensorInfos[0].GetDataType() != DataType::Float16) - { - throw InvalidArgumentException("ConvertFp32ToFp16QueueDescriptor: Output tensor type must be Float16."); - } - - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "ConvertFp32ToFp16QueueDescriptor", - "input", - "output"); -} - -void ConvertFp16ToFp32QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "ConvertFp16ToFp32QueueDescriptor"); - ValidateSingleOutput(workloadInfo, "ConvertFp16ToFp32QueueDescriptor"); - - if (workloadInfo.m_InputTensorInfos[0].GetDataType() != DataType::Float16) - { - throw InvalidArgumentException("ConvertFp16ToFp32QueueDescriptor: Input tensor type must be Float16."); - } - if (workloadInfo.m_OutputTensorInfos[0].GetDataType() != DataType::Float32) - { - throw InvalidArgumentException("ConvertFp16ToFp32QueueDescriptor: Output tensor type must be Float32."); - } - - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "ConvertFp16ToFp32QueueDescriptor", - "input", - "output"); -} - -void DivisionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateTwoInputs(workloadInfo, "DivisionQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "DivisionQueueDescriptor"); - - ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_InputTensorInfos[1], - workloadInfo.m_OutputTensorInfos[0], - "DivisionQueueDescriptor", - "first input", - "second input"); -} - -void SubtractionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateTwoInputs(workloadInfo, "SubtractionQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "SubtractionQueueDescriptor"); - - ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_InputTensorInfos[1], - workloadInfo.m_OutputTensorInfos[0], - "SubtractionQueueDescriptor", - "first input", - "second input"); -} - -void MeanQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "MeanQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "MeanQueueDescriptor"); - - const TensorInfo& input = workloadInfo.m_InputTensorInfos[0]; - const TensorInfo& output = workloadInfo.m_OutputTensorInfos[0]; - - if (m_Parameters.m_KeepDims) - { - ValidateTensorNumDimensions(output, "MeanQueueDescriptor", input.GetNumDimensions(), "output"); - } - else if (m_Parameters.m_Axis.empty()) - { - ValidateTensorNumDimensions(output, "MeanQueueDescriptor", 1, "output"); - } - else - { - auto outputDim = input.GetNumDimensions() - boost::numeric_cast<unsigned int>(m_Parameters.m_Axis.size()); - ValidateTensorNumDimensions(output, - "MeanQueueDescriptor", - outputDim > 0 ? outputDim : 1, - "output"); - } -} - -void PadQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -{ - ValidateSingleInput(workloadInfo, "PadQueueDescriptor"); - ValidateSingleOutput(workloadInfo, "PadQueueDescriptor"); - - const TensorInfo& input = workloadInfo.m_InputTensorInfos[0]; - const TensorInfo& output = workloadInfo.m_OutputTensorInfos[1]; - // input and output should have the same number of dimensions - ValidateTensorNumDimensions(output, "PadQueueDescriptor", input.GetNumDimensions(), "output"); - // there should be entry in the pad list for each dimension in the input tensor - if (m_Parameters.m_PadList.size() != input.GetNumDimensions()) { - throw InvalidArgumentException("Pad List should contain the same number of entries as there" - " are dimensions in the input tensor that is " + - to_string(input.GetNumDimensions()) + " entries " + - " not " + to_string(m_Parameters.m_PadList.size()) + " entries."); - } -} - -} //namespace armnn diff --git a/src/armnn/backends/WorkloadData.hpp b/src/armnn/backends/WorkloadData.hpp deleted file mode 100644 index b5b0402237..0000000000 --- a/src/armnn/backends/WorkloadData.hpp +++ /dev/null @@ -1,330 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "WorkloadDataFwd.hpp" - -#include "armnn/Types.hpp" -#include "armnn/Tensor.hpp" -#include "armnn/Descriptors.hpp" -#include "armnn/Exceptions.hpp" -#include "InternalTypes.hpp" -#include "OutputHandler.hpp" -#include "CpuTensorHandleFwd.hpp" - -namespace armnn -{ - -//A helper function that returns the bias data type required for given input data type. -DataType GetBiasDataType(DataType inputDataType); - -struct WorkloadInfo; - -struct QueueDescriptor -{ - std::vector<ITensorHandle*> m_Inputs; - std::vector<ITensorHandle*> m_Outputs; - - void ValidateInputsOutputs(const std::string& descName, - unsigned int numExpectedIn, unsigned int numExpectedOut) const; - - -protected: - ~QueueDescriptor() = default; - QueueDescriptor() = default; - QueueDescriptor(QueueDescriptor const&) = default; - QueueDescriptor& operator=(QueueDescriptor const&) = default; -}; - -// Base class for queue descriptors which contain parameters. -template <typename LayerDescriptor> -struct QueueDescriptorWithParameters : public QueueDescriptor -{ - LayerDescriptor m_Parameters; - -protected: - ~QueueDescriptorWithParameters() = default; - QueueDescriptorWithParameters() = default; - QueueDescriptorWithParameters(QueueDescriptorWithParameters const&) = default; - QueueDescriptorWithParameters& operator=(QueueDescriptorWithParameters const&) = default; -}; - -struct MemCopyQueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -using InputQueueDescriptor = MemCopyQueueDescriptor; -using OutputQueueDescriptor = MemCopyQueueDescriptor; - -// Softmax layer workload data. -struct SoftmaxQueueDescriptor : QueueDescriptorWithParameters<SoftmaxDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Splitter layer workload data. -struct SplitterQueueDescriptor : QueueDescriptorWithParameters<ViewsDescriptor> -{ - struct ViewOrigin - { - ViewOrigin() {} - ViewOrigin(std::vector<unsigned int> const& origin) : m_Origin(origin) {} - - //View origin (size of the vector is the same as number of dimensions of the view). - std::vector<unsigned int> m_Origin; - }; - - //View defines a tensor that will be carved from the input tensor. - //View origins are stored here, the extents are defined by sizes of the output tensors. - std::vector<ViewOrigin> m_ViewOrigins; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Merger layer workload data. -struct MergerQueueDescriptor : QueueDescriptorWithParameters<OriginsDescriptor> -{ - struct ViewOrigin - { - ViewOrigin() {} - ViewOrigin(const std::vector<unsigned int>& origin) : m_Origin(origin) {} - - //View origin (size of the vector is the same as number of dimensions of the view). - std::vector<unsigned int> m_Origin; - }; - - //View defines a sub-area of the output tensor that will be filled with the corresponding input tensor. - //View origins are stored here, the extents are defined by sizes of the input tensors. - std::vector<ViewOrigin> m_ViewOrigins; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Activation layer workload data. -struct ActivationQueueDescriptor : QueueDescriptorWithParameters<ActivationDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Fully connected layer workload data. -struct FullyConnectedQueueDescriptor : QueueDescriptorWithParameters<FullyConnectedDescriptor> -{ - FullyConnectedQueueDescriptor() - : m_Weight(nullptr) - , m_Bias(nullptr) - { - } - - const ConstCpuTensorHandle* m_Weight; - const ConstCpuTensorHandle* m_Bias; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Permute layer workload data. -struct PermuteQueueDescriptor : QueueDescriptorWithParameters<PermuteDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Pooling 2D layer workload data. -struct Pooling2dQueueDescriptor : QueueDescriptorWithParameters<Pooling2dDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Convolution 2D layer workload data. -struct Convolution2dQueueDescriptor : QueueDescriptorWithParameters<Convolution2dDescriptor> -{ - Convolution2dQueueDescriptor() - : m_Weight(nullptr) - , m_Bias(nullptr) - { - } - - const ConstCpuTensorHandle* m_Weight; - const ConstCpuTensorHandle* m_Bias; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Depthwise Convolution 2D layer workload data. -struct DepthwiseConvolution2dQueueDescriptor : QueueDescriptorWithParameters<DepthwiseConvolution2dDescriptor> -{ - DepthwiseConvolution2dQueueDescriptor() - : m_Weight(nullptr) - , m_Bias(nullptr) - { - } - - const ConstCpuTensorHandle* m_Weight; - const ConstCpuTensorHandle* m_Bias; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Normalization layer workload data. -struct NormalizationQueueDescriptor : QueueDescriptorWithParameters<NormalizationDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Add layer workload data. -struct AdditionQueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Multiplication layer workload data. -struct MultiplicationQueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Division layer workload data. -struct DivisionQueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Subtraction layer workload data. -struct SubtractionQueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Mean layer workload data. -struct MeanQueueDescriptor : QueueDescriptorWithParameters<MeanDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Pad layer workload data -struct PadQueueDescriptor : QueueDescriptorWithParameters<PadDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -// Batch norm layer workload data. -struct BatchNormalizationQueueDescriptor : QueueDescriptorWithParameters<BatchNormalizationDescriptor> -{ - BatchNormalizationQueueDescriptor() - : m_Mean(nullptr) - , m_Variance(nullptr) - , m_Beta(nullptr) - , m_Gamma(nullptr) - { - } - - const ConstCpuTensorHandle* m_Mean; - const ConstCpuTensorHandle* m_Variance; - const ConstCpuTensorHandle* m_Beta; - const ConstCpuTensorHandle* m_Gamma; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct ResizeBilinearQueueDescriptor : QueueDescriptorWithParameters<ResizeBilinearDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct FakeQuantizationQueueDescriptor : QueueDescriptorWithParameters<FakeQuantizationDescriptor> -{ - FakeQuantizationQueueDescriptor() - : m_Min(nullptr) - , m_Max(nullptr) - { - } - - const ConstCpuTensorHandle* m_Min; - const ConstCpuTensorHandle* m_Max; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct L2NormalizationQueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct ConstantQueueDescriptor : QueueDescriptor -{ - ConstantQueueDescriptor() - : m_LayerOutput(nullptr) - { - } - - const ConstCpuTensorHandle* m_LayerOutput; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct ReshapeQueueDescriptor : QueueDescriptorWithParameters<ReshapeDescriptor> -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct FloorQueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor> -{ - LstmQueueDescriptor() - : m_InputToInputWeights(nullptr) - , m_InputToForgetWeights(nullptr) - , m_InputToCellWeights(nullptr) - , m_InputToOutputWeights(nullptr) - , m_RecurrentToInputWeights(nullptr) - , m_RecurrentToForgetWeights(nullptr) - , m_RecurrentToCellWeights(nullptr) - , m_RecurrentToOutputWeights(nullptr) - , m_CellToInputWeights(nullptr) - , m_CellToForgetWeights(nullptr) - , m_CellToOutputWeights(nullptr) - , m_InputGateBias(nullptr) - , m_ForgetGateBias(nullptr) - , m_CellBias(nullptr) - , m_OutputGateBias(nullptr) - , m_ProjectionWeights(nullptr) - , m_ProjectionBias(nullptr) - { - } - - const ConstCpuTensorHandle* m_InputToInputWeights; - const ConstCpuTensorHandle* m_InputToForgetWeights; - const ConstCpuTensorHandle* m_InputToCellWeights; - const ConstCpuTensorHandle* m_InputToOutputWeights; - const ConstCpuTensorHandle* m_RecurrentToInputWeights; - const ConstCpuTensorHandle* m_RecurrentToForgetWeights; - const ConstCpuTensorHandle* m_RecurrentToCellWeights; - const ConstCpuTensorHandle* m_RecurrentToOutputWeights; - const ConstCpuTensorHandle* m_CellToInputWeights; - const ConstCpuTensorHandle* m_CellToForgetWeights; - const ConstCpuTensorHandle* m_CellToOutputWeights; - const ConstCpuTensorHandle* m_InputGateBias; - const ConstCpuTensorHandle* m_ForgetGateBias; - const ConstCpuTensorHandle* m_CellBias; - const ConstCpuTensorHandle* m_OutputGateBias; - const ConstCpuTensorHandle* m_ProjectionWeights; - const ConstCpuTensorHandle* m_ProjectionBias; - - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct ConvertFp16ToFp32QueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct ConvertFp32ToFp16QueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -} //namespace armnn diff --git a/src/armnn/backends/WorkloadDataCollector.hpp b/src/armnn/backends/WorkloadDataCollector.hpp deleted file mode 100644 index ac8c2e2ab9..0000000000 --- a/src/armnn/backends/WorkloadDataCollector.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/Tensor.hpp> - -#include <vector> - -namespace armnn -{ -class ITensorHandle; - -class WorkloadDataCollector -{ -public: - WorkloadDataCollector(std::vector<ITensorHandle*>& handles, std::vector<TensorInfo>& infos) - : m_Handles(handles) - , m_Infos(infos) - { - } - - void Push(ITensorHandle* handle, const TensorInfo& info) - { - m_Handles.push_back(handle); - m_Infos.push_back(info); - } - -private: - std::vector<ITensorHandle*>& m_Handles; - std::vector<TensorInfo>& m_Infos; -}; - - -} //namespace armnn diff --git a/src/armnn/backends/WorkloadDataFwd.hpp b/src/armnn/backends/WorkloadDataFwd.hpp deleted file mode 100644 index 9ae20e0ce1..0000000000 --- a/src/armnn/backends/WorkloadDataFwd.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -namespace armnn -{ - -struct QueueDescriptor; -template <typename LayerDescriptor> -struct QueueDescriptorWithParameters; -struct SoftmaxQueueDescriptor; -struct SplitterQueueDescriptor; -struct MergerQueueDescriptor; -struct ActivationQueueDescriptor; -struct FullyConnectedQueueDescriptor; -struct PermuteQueueDescriptor; -struct Pooling2dQueueDescriptor; -struct Convolution2dQueueDescriptor; -struct NormalizationQueueDescriptor; -struct MultiplicationQueueDescriptor; -struct BatchNormalizationQueueDescriptor; -struct FakeQuantizationQueueDescriptor; -struct ReshapeQueueDescriptor; - -} // namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/WorkloadFactory.cpp b/src/armnn/backends/WorkloadFactory.cpp deleted file mode 100644 index 773a8c1a18..0000000000 --- a/src/armnn/backends/WorkloadFactory.cpp +++ /dev/null @@ -1,571 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "WorkloadFactory.hpp" -#include "RefWorkloadFactory.hpp" -#include "NeonWorkloadFactory.hpp" -#include "ClWorkloadFactory.hpp" - -#include "armnn/Types.hpp" -#include "armnn/LayerSupport.hpp" -#include "Layer.hpp" -#include "LayersFwd.hpp" -#include "CpuTensorHandle.hpp" - -#include <boost/cast.hpp> -#include <cstring> -#include <boost/iterator/transform_iterator.hpp> - -namespace armnn -{ - -namespace -{ - const TensorInfo OverrideDataType(const TensorInfo& info, boost::optional<DataType> type) - { - if (type == boost::none) - { - return info; - } - - return TensorInfo(info.GetShape(), type.get(), info.GetQuantizationScale(), info.GetQuantizationOffset()); - } - - boost::optional<DataType> GetBiasTypeFromWeightsType(boost::optional<DataType> weightsType) - { - if (weightsType == boost::none) - { - return weightsType; - } - - switch(weightsType.get()) - { - case DataType::Float16: - case DataType::Float32: - return weightsType; - case DataType::QuantisedAsymm8: - return DataType::Signed32; - default: - BOOST_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type."); - } - return boost::none; - } -} - -bool IWorkloadFactory::IsLayerSupported(Compute compute, const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported) -{ - constexpr size_t reasonCapacity = 1024; - char reason[reasonCapacity]; - bool result; - switch(layer.GetType()) - { - case LayerType::Activation: - { - auto cLayer = boost::polymorphic_downcast<const ActivationLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsActivationSupported(compute, - OverrideDataType(input, dataType), - OverrideDataType(output, dataType), - cLayer->GetParameters(), - reason, - reasonCapacity); - break; - } - case LayerType::Addition: - { - const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsAdditionSupported(compute, - OverrideDataType(input0, dataType), - OverrideDataType(input1, dataType), - OverrideDataType(output, dataType), - reason, - reasonCapacity); - break; - } - case LayerType::BatchNormalization: - { - auto cLayer = boost::polymorphic_downcast<const BatchNormalizationLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - const TensorInfo& mean = cLayer->m_Mean->GetTensorInfo(); - const TensorInfo& var = cLayer->m_Variance->GetTensorInfo(); - const TensorInfo& beta = cLayer->m_Beta->GetTensorInfo(); - const TensorInfo& gamma = cLayer->m_Gamma->GetTensorInfo(); - result = IsBatchNormalizationSupported(compute, - OverrideDataType(input, dataType), - OverrideDataType(output, dataType), - OverrideDataType(mean, dataType), - OverrideDataType(var, dataType), - OverrideDataType(beta, dataType), - OverrideDataType(gamma, dataType), - cLayer->GetParameters(), - reason, reasonCapacity); - break; - } - case LayerType::Constant: - { - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsConstantSupported(compute, OverrideDataType(output, dataType), reason, reasonCapacity); - break; - } - case LayerType::ConvertFp16ToFp32: - { - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsConvertFp16ToFp32Supported(compute, input, output, reason, reasonCapacity); - break; - } - case LayerType::ConvertFp32ToFp16: - { - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsConvertFp32ToFp16Supported(compute, input, output, reason, reasonCapacity); - break; - } - case LayerType::Convolution2d: - { - auto cLayer = boost::polymorphic_downcast<const Convolution2dLayer*>(&layer); - - const TensorInfo input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), - dataType); - const TensorInfo output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); - BOOST_ASSERT(cLayer->m_Weight.get() != nullptr); - - const Convolution2dDescriptor& descriptor = cLayer->GetParameters(); - - // Construct optional biases object based on the value of m_BiasEnabled - boost::optional<TensorInfo> biases(boost::none); - if (descriptor.m_BiasEnabled) - { - biases = boost::make_optional( - OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType))); - } - - result = IsConvolution2dSupported(compute, - input, - output, - descriptor, - OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType), - biases, - reason, - reasonCapacity); - break; - } - case LayerType::MemCopy: - { - // MemCopy supported for CpuRef, CpuAcc and GpuAcc backends, - // (also treat Undefined as CpuRef to avoid breaking lots of Unit tests). - result = compute == Compute::CpuRef || compute == Compute::Undefined - || compute == Compute::CpuAcc || compute == Compute::GpuAcc; - strcpy(reason, "Unsupported backend type"); - break; - } - case LayerType::DepthwiseConvolution2d: - { - auto cLayer = boost::polymorphic_downcast<const DepthwiseConvolution2dLayer*>(&layer); - const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), - dataType); - const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); - BOOST_ASSERT(cLayer->m_Weight.get() != nullptr); - - const DepthwiseConvolution2dDescriptor& descriptor = cLayer->GetParameters(); - - // Construct optional biases object based on the value of m_BiasEnabled - boost::optional<TensorInfo> biases(boost::none); - if (descriptor.m_BiasEnabled) - { - biases = boost::make_optional( - OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType))); - } - - result = IsDepthwiseConvolutionSupported(compute, - input, - output, - descriptor, - OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType), - biases, - reason, - reasonCapacity); - break; - } - case LayerType::FakeQuantization: - { - auto cLayer = boost::polymorphic_downcast<const FakeQuantizationLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - result = IsFakeQuantizationSupported(compute, OverrideDataType(input, dataType), cLayer->GetParameters(), - reason, reasonCapacity); - break; - } - case LayerType::Floor: - { - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsFloorSupported(compute, OverrideDataType(input, dataType), OverrideDataType(output, dataType), - reason, reasonCapacity); - break; - } - case LayerType::FullyConnected: - { - auto cLayer = boost::polymorphic_downcast<const FullyConnectedLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - BOOST_ASSERT(cLayer->m_Weight.get() != nullptr); - - TensorInfo biasInfo; - const TensorInfo * biasInfoPtr = nullptr; - static const TensorInfo dummyFloat16Bias(TensorShape({1,1,1,1}), DataType::Float16); - static const TensorInfo dummyFloat32Bias(TensorShape({1,1,1,1}), DataType::Float32); - static const TensorInfo dummyQA8Bias(TensorShape({1,1,1,1}), DataType::Signed32); - - const FullyConnectedDescriptor& descriptor = cLayer->GetParameters(); - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(cLayer->m_Bias.get() != nullptr); - biasInfo = OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType)); - biasInfoPtr = &biasInfo; - } - else - { - // If biases are not enabled pass a dummy tensorinfo for the validation - switch(input.GetDataType()) - { - case DataType::Float16: - { - biasInfoPtr = &dummyFloat16Bias; - break; - } - case DataType::Float32: - { - biasInfoPtr = &dummyFloat32Bias; - break; - } - case DataType::QuantisedAsymm8: - { - biasInfoPtr = &dummyQA8Bias; - break; - } - default: - { - BOOST_ASSERT_MSG(false, "Unexpected bias type"); - } - } - } - - result = IsFullyConnectedSupported(compute, - OverrideDataType(input, dataType), - OverrideDataType(output, dataType), - OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType), - *biasInfoPtr, - descriptor, - reason, - reasonCapacity); - break; - } - case LayerType::Input: - { - const TensorInfo& input = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsInputSupported(compute, OverrideDataType(input, dataType), reason, reasonCapacity); - break; - } - case LayerType::L2Normalization: - { - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsL2NormalizationSupported(compute, OverrideDataType(input, dataType), - OverrideDataType(output, dataType), reason, reasonCapacity); - break; - } - case LayerType::Lstm: - { - auto cLayer = boost::polymorphic_downcast<const LstmLayer*>(&layer); - const LstmDescriptor& descriptor = cLayer->GetParameters(); - - // All inputs. - const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), - dataType); - const TensorInfo& outputStateIn = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), - dataType); - const TensorInfo& cellStateIn = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), - dataType); - // All outputs - const TensorInfo& scratchBuffer = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); - const TensorInfo& outputStateOut = OverrideDataType(layer.GetOutputSlot(1).GetTensorInfo(), dataType); - const TensorInfo& cellStateOut = OverrideDataType(layer.GetOutputSlot(2).GetTensorInfo(), dataType); - const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(3).GetTensorInfo(), dataType); - - // Basic parameters - const TensorInfo& inputToForgetWeights - = OverrideDataType(cLayer->m_BasicParameters.m_InputToForgetWeights->GetTensorInfo(), dataType); - const TensorInfo& inputToCellWeights - = OverrideDataType(cLayer->m_BasicParameters.m_InputToCellWeights->GetTensorInfo(), dataType); - const TensorInfo& inputToOutputWeights - = OverrideDataType(cLayer->m_BasicParameters.m_InputToOutputWeights->GetTensorInfo(), dataType); - const TensorInfo& recurrentToForgetWeights - = OverrideDataType(cLayer->m_BasicParameters.m_RecurrentToForgetWeights->GetTensorInfo(), dataType); - const TensorInfo& recurrentToCellWeights - = OverrideDataType(cLayer->m_BasicParameters.m_RecurrentToCellWeights->GetTensorInfo(), dataType); - const TensorInfo& recurrentToOutputWeights - = OverrideDataType(cLayer->m_BasicParameters.m_RecurrentToOutputWeights->GetTensorInfo(), dataType); - const TensorInfo& forgetGateBias - = OverrideDataType(cLayer->m_BasicParameters.m_ForgetGateBias->GetTensorInfo(), dataType); - const TensorInfo& cellBias - = OverrideDataType(cLayer->m_BasicParameters.m_CellBias->GetTensorInfo(), dataType); - const TensorInfo& outputGateBias - = OverrideDataType(cLayer->m_BasicParameters.m_OutputGateBias->GetTensorInfo(), dataType); - - // Optional parameters - const TensorInfo* inputToInputWeights = nullptr; - const TensorInfo* recurrentToInputWeights = nullptr; - const TensorInfo* cellToInputWeights = nullptr; - const TensorInfo* inputGateBias = nullptr; - const TensorInfo* projectionWeights = nullptr; - const TensorInfo* projectionBias = nullptr; - const TensorInfo* cellToForgetWeights = nullptr; - const TensorInfo* cellToOutputWeights = nullptr; - - TensorInfo optInputToInputWeights; - TensorInfo optRecurrentToInputWeights; - TensorInfo optCellToInputWeights; - TensorInfo optInputGateBias; - TensorInfo optProjectionWeights; - TensorInfo optProjectionBias; - TensorInfo optCellToForgetWeights; - TensorInfo optCellToOutputWeights; - - if(!descriptor.m_CifgEnabled) - { - optInputToInputWeights = - OverrideDataType(cLayer->m_CifgParameters.m_InputToInputWeights->GetTensorInfo(), dataType); - inputToInputWeights = &optInputToInputWeights; - - optRecurrentToInputWeights = - OverrideDataType(cLayer->m_CifgParameters.m_RecurrentToInputWeights->GetTensorInfo(), dataType); - recurrentToInputWeights = &optRecurrentToInputWeights; - if (cLayer->m_CifgParameters.m_CellToInputWeights != nullptr) - { - optCellToInputWeights = - OverrideDataType(cLayer->m_CifgParameters.m_CellToInputWeights->GetTensorInfo(), dataType); - cellToInputWeights = &optCellToInputWeights; - } - optInputGateBias = - OverrideDataType(cLayer->m_CifgParameters.m_InputGateBias->GetTensorInfo(), dataType); - inputGateBias = &optInputGateBias; - } - - if(descriptor.m_ProjectionEnabled) - { - optProjectionWeights = - OverrideDataType(cLayer->m_ProjectionParameters.m_ProjectionWeights->GetTensorInfo(), dataType); - projectionWeights = &optProjectionWeights; - if (cLayer->m_ProjectionParameters.m_ProjectionBias != nullptr) - { - optProjectionBias = - OverrideDataType(cLayer->m_ProjectionParameters.m_ProjectionBias->GetTensorInfo(), dataType); - projectionBias = &optProjectionBias; - } - } - - if(descriptor.m_PeepholeEnabled) - { - optCellToForgetWeights = - OverrideDataType(cLayer->m_PeepholeParameters.m_CellToForgetWeights->GetTensorInfo(), dataType); - cellToForgetWeights = &optCellToForgetWeights; - optCellToOutputWeights = - OverrideDataType(cLayer->m_PeepholeParameters.m_CellToOutputWeights->GetTensorInfo(), dataType); - cellToOutputWeights = &optCellToOutputWeights; - } - - result = IsLstmSupported(compute, - input, - outputStateIn, - cellStateIn, - scratchBuffer, - outputStateOut, - cellStateOut, - output, - descriptor, - inputToForgetWeights, - inputToCellWeights, - inputToOutputWeights, - recurrentToForgetWeights, - recurrentToCellWeights, - recurrentToOutputWeights, - forgetGateBias, - cellBias, - outputGateBias, - inputToInputWeights, - recurrentToInputWeights, - cellToInputWeights, - inputGateBias, - projectionWeights, - projectionBias, - cellToForgetWeights, - cellToOutputWeights, - reason, - reasonCapacity); - break; - } - case LayerType::Merger: - { - auto cLayer = boost::polymorphic_downcast<const MergerLayer*>(&layer); - - // Get vector of all inputs. - auto getTensorInfo = [&dataType](const InputSlot& slot) - { - return OverrideDataType(slot.GetConnectedOutputSlot()->GetTensorInfo(), dataType); - }; - auto beginI = boost::make_transform_iterator(layer.GetInputSlots().begin(), getTensorInfo); - auto endI = boost::make_transform_iterator(layer.GetInputSlots().end(), getTensorInfo); - std::vector<TensorInfo> inputs(beginI, endI); - - auto getTensorInfoPtr = [](const TensorInfo& info) - { - return &info; - }; - auto beginPtr = boost::make_transform_iterator(inputs.begin(), getTensorInfoPtr); - auto endPtr = boost::make_transform_iterator(inputs.end(), getTensorInfoPtr); - std::vector<const TensorInfo*> inputPtrs(beginPtr, endPtr); - - result = IsMergerSupported(compute, inputPtrs, cLayer->GetParameters(), reason, reasonCapacity); - break; - } - case LayerType::Multiplication: - { - const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsMultiplicationSupported(compute, - OverrideDataType(input0, dataType), - OverrideDataType(input1, dataType), - OverrideDataType(output, dataType), - reason, - reasonCapacity); - break; - } - case LayerType::Normalization: - { - auto cLayer = boost::polymorphic_downcast<const NormalizationLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsNormalizationSupported(compute, OverrideDataType(input, dataType), - OverrideDataType(output, dataType), cLayer->GetParameters(), reason, - reasonCapacity); - break; - } - case LayerType::Output: - { - const TensorInfo& output = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - result = IsOutputSupported(compute, OverrideDataType(output, dataType), reason, reasonCapacity); - break; - } - case LayerType::Permute: - { - auto cLayer = boost::polymorphic_downcast<const PermuteLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsPermuteSupported(compute, OverrideDataType(input, dataType), OverrideDataType(output, dataType), - cLayer->GetParameters(), reason, reasonCapacity); - break; - } - case LayerType::Pooling2d: - { - auto cLayer = boost::polymorphic_downcast<const Pooling2dLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsPooling2dSupported(compute, OverrideDataType(input, dataType), - OverrideDataType(output, dataType), cLayer->GetParameters(), reason, - reasonCapacity); - break; - } - case LayerType::Division: - { - const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsDivisionSupported(compute, - OverrideDataType(input0, dataType), - OverrideDataType(input1, dataType), - OverrideDataType(output, dataType), - reason, - reasonCapacity); - break; - } - case LayerType::Reshape: - { - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - result = IsReshapeSupported(compute, OverrideDataType(input, dataType), reason, reasonCapacity); - break; - } - case LayerType::ResizeBilinear: - { - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - result = IsResizeBilinearSupported(compute, OverrideDataType(input, dataType), reason, reasonCapacity); - break; - } - case LayerType::Softmax: - { - auto cLayer = boost::polymorphic_downcast<const SoftmaxLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsSoftmaxSupported(compute, OverrideDataType(input, dataType), OverrideDataType(output, dataType), - cLayer->GetParameters(), reason, reasonCapacity); - break; - } - case LayerType::Splitter: - { - auto cLayer = boost::polymorphic_downcast<const SplitterLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - result = IsSplitterSupported(compute, OverrideDataType(input, dataType), cLayer->GetParameters(), reason, - reasonCapacity); - break; - } - case LayerType::Subtraction: - { - const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsSubtractionSupported(compute, - OverrideDataType(input0, dataType), - OverrideDataType(input1, dataType), - OverrideDataType(output, dataType), - reason, - reasonCapacity); - break; - } - case LayerType::Mean: - { - auto cLayer = boost::polymorphic_downcast<const MeanLayer*>(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = IsMeanSupported(compute, - OverrideDataType(input, dataType), - OverrideDataType(output, dataType), - cLayer->GetParameters(), - reason, - reasonCapacity); - break; - } - default: - { - BOOST_ASSERT_MSG(false, "WorkloadFactory did not recognise type of layer."); - strcpy(reason, "Unrecognised layer type"); - result = false; - break; - } - } - outReasonIfUnsupported = reason; - return result; -} - -bool IWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported) -{ - return IsLayerSupported(layer.GetComputeDevice(), layer, dataType, outReasonIfUnsupported); -} - -} diff --git a/src/armnn/backends/WorkloadFactory.hpp b/src/armnn/backends/WorkloadFactory.hpp deleted file mode 100644 index fbc6134574..0000000000 --- a/src/armnn/backends/WorkloadFactory.hpp +++ /dev/null @@ -1,137 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "Workload.hpp" -#include <memory> -#include "armnn/TensorFwd.hpp" -#include "OutputHandler.hpp" -#include <boost/optional.hpp> - -namespace armnn -{ - -class Layer; - -// Workload factory interface for compute backends. -class IWorkloadFactory -{ -public: - virtual ~IWorkloadFactory() { } - - virtual Compute GetCompute() const = 0; - - /// Informs the memory manager that the network is finalized and ready for execution. - virtual void Finalize() { } - - /// Inform the memory manager to release the memory - virtual void Release() { } - - /// Inform the memory manager to acquire memory - virtual void Acquire() { } - - static bool IsLayerSupported(Compute compute, const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported); - static bool IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, - std::string& outReasonIfUnsupported); - - virtual bool SupportsSubTensors() const = 0; - - virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin - ) const = 0; - - virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0; - - virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const = 0; - - virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& Info) const = 0; - - virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& Info) const = 0; -}; - -} //namespace armnn diff --git a/src/armnn/backends/WorkloadInfo.hpp b/src/armnn/backends/WorkloadInfo.hpp deleted file mode 100644 index 304bc0bf06..0000000000 --- a/src/armnn/backends/WorkloadInfo.hpp +++ /dev/null @@ -1,18 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -namespace armnn -{ - -/// Contains information about inputs and outputs to a layer. -/// This is needed at construction of workloads, but are not stored. -struct WorkloadInfo -{ - std::vector<TensorInfo> m_InputTensorInfos; - std::vector<TensorInfo> m_OutputTensorInfos; -}; - -} //namespace armnn diff --git a/src/armnn/backends/WorkloadUtils.hpp b/src/armnn/backends/WorkloadUtils.hpp deleted file mode 100644 index 65c58eabd9..0000000000 --- a/src/armnn/backends/WorkloadUtils.hpp +++ /dev/null @@ -1,139 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "armnn/Tensor.hpp" -#include "ITensorHandle.hpp" - -#include <boost/cast.hpp> - -namespace armnn -{ -namespace -{ -template<typename ArrayType, typename Arg> -void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg) -{ - if (idx >= num) - { - return; - } - - arg = array[(num - 1) - idx]; - idx++; -}; - -template<typename T, typename ArrayType, typename ...Args> -void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args& ... args) -{ - AssignValues(num, idx, array, assignee); - - AssignValues(num, idx, array, args...); -} -} // namespace - -template<typename CopyFunc> -void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy) -{ - static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyTensorContents"); - - TensorShape srcStrides = srcTensor->GetStrides(); - const TensorShape& srcShape = srcTensor->GetShape(); - TensorShape dstStrides = dstTensor->GetStrides(); - const TensorShape& dstShape = dstTensor->GetShape(); - - size_t srcBatches = 1; - size_t srcChannels = 1; - size_t srcHeight = 1; - size_t srcWidth = 1; - AssignValues(srcShape.GetNumDimensions(),0, srcShape, - srcWidth, - srcHeight, - srcChannels, - srcBatches); - - size_t srcBatchStride = 0; - size_t srcChannelStride = 0; - size_t srcHeightStride = 0; - size_t srcWidthStride = 0; - AssignValues(srcStrides.GetNumDimensions(),0, srcStrides, - srcWidthStride, - srcHeightStride, - srcChannelStride, - srcBatchStride); - - size_t dstBatches = 1; - size_t dstChannels = 1; - size_t dstHeight = 1; - size_t dstWidth = 1; - AssignValues(dstShape.GetNumDimensions(),0, dstShape, - dstWidth, - dstHeight, - dstChannels, - dstBatches); - - size_t dstBatchStride = 0; - size_t dstChannelStride = 0; - size_t dstHeightStride = 0; - size_t dstWidthStride = 0; - AssignValues(dstStrides.GetNumDimensions(),0, dstStrides, - dstWidthStride, - dstHeightStride, - dstChannelStride, - dstBatchStride); - - auto srcData = static_cast<const uint8_t*>(srcTensor->Map()); - auto dstData = static_cast<uint8_t*>(dstTensor->Map()); - - size_t copyLength = std::min(srcWidth*srcWidthStride, dstWidth*dstWidthStride); - size_t copyHeight = std::min(srcHeight, dstHeight); - size_t copyChannels = std::min(srcChannels, dstChannels); - size_t copyBatches = std::min(srcBatches, dstBatches); - - for(unsigned int b=0; b < copyBatches; ++b) - { - auto srcPtrBatch = srcData; - auto dstPtrBatch = dstData; - for (unsigned int c=0; c< copyChannels; ++c) - { - auto srcPtrChannel = srcData; - auto dstPtrChannel = dstData; - for (unsigned int h=0; h < copyHeight; ++h) - { - copy(dstData, srcData, copyLength); - dstData += dstHeightStride; - srcData += srcHeightStride; - } - dstData += (static_cast<long>(dstChannelStride) - (dstData - dstPtrChannel)); - srcData += (static_cast<long>(srcChannelStride) - (srcData - srcPtrChannel)); - } - dstData += (static_cast<long>(dstBatchStride)-(dstData - dstPtrBatch)); - srcData += (static_cast<long>(srcBatchStride)-(srcData - srcPtrBatch)); - } - - srcTensor->Unmap(); - dstTensor->Unmap(); -} - -template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType> -void GatherTensorHandlePairs(const DescriptorType& descriptor, - std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs) -{ - const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size()); - tensorHandlePairs.reserve(numInputs); - - for (unsigned int i = 0; i < numInputs; ++i) - { - SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast<SrcTensorHandleType*>( - descriptor.m_Inputs[i]); - DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast<DstTensorHandleType*>( - descriptor.m_Outputs[i]); - - tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle); - } -} - -} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/test/ActivationFixture.hpp b/src/armnn/backends/test/ActivationFixture.hpp deleted file mode 100644 index d9d4ca7470..0000000000 --- a/src/armnn/backends/test/ActivationFixture.hpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "TensorCopyUtils.hpp" -#include "WorkloadTestUtils.hpp" - -struct ActivationFixture -{ - ActivationFixture() - { - auto boostArrayExtents = boost::extents - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(batchSize)] - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(channels)] - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(height)] - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(width)]; - output.resize(boostArrayExtents); - outputExpected.resize(boostArrayExtents); - input.resize(boostArrayExtents); - - unsigned int inputShape[] = { batchSize, channels, height, width }; - unsigned int outputShape[] = { batchSize, channels, height, width }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - input = MakeRandomTensor<float, 4>(inputTensorInfo, 21453); - } - - unsigned int width = 17; - unsigned int height = 29; - unsigned int channels = 2; - unsigned int batchSize = 5; - - boost::multi_array<float, 4> output; - boost::multi_array<float, 4> outputExpected; - boost::multi_array<float, 4> input; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - // Parameters used by some of the activation functions. - float a = 0.234f; - float b = -12.345f; -}; - - -struct PositiveActivationFixture : public ActivationFixture -{ - PositiveActivationFixture() - { - input = MakeRandomTensor<float, 4>(inputTensorInfo, 2342423, 0.0f, 1.0f); - } -};
\ No newline at end of file diff --git a/src/armnn/backends/test/ActivationTestImpl.hpp b/src/armnn/backends/test/ActivationTestImpl.hpp deleted file mode 100644 index a5d327c287..0000000000 --- a/src/armnn/backends/test/ActivationTestImpl.hpp +++ /dev/null @@ -1,560 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" -#include "QuantizeHelper.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" -#include "ActivationFixture.hpp" - -#include <algorithm> - -template<typename T> -LayerTestResult<T, 4> BoundedReLuTestCommon(armnn::IWorkloadFactory& workloadFactory, - float upperBound, float lowerBound, - float inputScale, int32_t inputOffset, float outputScale, int32_t outputOffset, - const std::vector<T>& inputData, const std::vector<T>& outputExpectedData, - unsigned int inputWidth, unsigned int inputHeight, - unsigned int inputChannels, unsigned int inputBatchSize) -{ - unsigned int outputWidth = inputWidth; - unsigned int outputHeight = inputHeight; - unsigned int outputChannels = inputChannels; - unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::GetDataType<T>()); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::GetDataType<T>()); - - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(inputScale); - inputTensorInfo.SetQuantizationOffset(inputOffset); - - outputTensorInfo.SetQuantizationScale(outputScale); - outputTensorInfo.SetQuantizationOffset(outputOffset); - } - - LayerTestResult<T, 4> result(inputTensorInfo); - - auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - // Setup bounded ReLu. - armnn::ActivationQueueDescriptor descriptor; - armnn::WorkloadInfo workloadInfo; - AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - - descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; - descriptor.m_Parameters.m_A = upperBound; - descriptor.m_Parameters.m_B = lowerBound; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - - result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); - - return result; -} - -LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int inputWidth = 4u; - unsigned int inputHeight = 5u; - unsigned int inputChannels = 1u; - unsigned int inputBatchSize = 1; - - std::vector<float> input = std::vector<float>{ - -2.0f, 0.1f, 0.5f, 1.25f, - 0.786f, 0.9875f, -1.5f, 0.384f, - 1.0001f, 3.5f, 7.5f, 0.896f, - 2.126f, 2.0f, 0.3f, 0.15f, - 0.999f, 1.2f, 0.89f, 6.1f, - }; - - // Calculated manually. - std::vector<float> output = std::vector<float>{ - -1.0f, 0.1f, 0.5f, 1.0f, - 0.786f, 0.9875f, -1.0f, 0.384f, - 1.0f, 1.0f, 1.0f, 0.896f, - 1.0f, 1.0f, 0.3f, 0.15f, - 0.999f, 1.0f, 0.89f, 1.0f, - }; - - return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, 1.0f, 0, 1.0f, 0, input, output, - inputWidth, inputHeight, inputChannels, inputBatchSize); -} - -LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int inputWidth = 4u; - unsigned int inputHeight = 5u; - unsigned int inputChannels = 1u; - unsigned int inputBatchSize = 1; - - std::vector<float> input = std::vector<float>{ - -1.0f, 0.1f, 0.5f, 6.25f, - 0.786f, 5.9875f, -0.5f, 0.384f, - 6.0001f, 3.5f, 7.5f, 0.896f, - 2.126f, 12.0f, 0.3f, 0.15f, - 0.999f, 1.2f, 0.89f, 6.1f, - }; - - // Calculated manually. - std::vector<float> output = std::vector<float>{ - 0.0f, 0.1f, 0.5f, 6.0f, - 0.786f, 5.9875f, 0.0f, 0.384f, - 6.0f, 3.5f, 6.0f, 0.896f, - 2.126f, 6.0f, 0.3f, 0.15f, - 0.999f, 1.2f, 0.89f, 6.0f, - }; - - return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, 1.0f, 0, 1.0f, 0, input, output, - inputWidth, inputHeight, inputChannels, inputBatchSize); -} - -LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int inputWidth = 3u; - unsigned int inputHeight = 2u; - unsigned int inputChannels = 1u; - unsigned int inputBatchSize = 1; - - std::vector<uint8_t> input = std::vector<uint8_t>{ - 51, 124, 28, - 251, 8, 92 - }; - - // Calculated manually. - std::vector<uint8_t> output = std::vector<uint8_t>{ - 0, 122, 0, - 255, 0, 58 - }; - - float inputScale = 12.0f / 255.0f; - int32_t inputOffset = 63; - float outputScale = 6.0f / 255.0f; - int32_t outputOffset = 0; - - return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, - inputScale, inputOffset, outputScale, outputOffset, - input, output, - inputWidth, inputHeight, inputChannels, inputBatchSize); -} - -LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int inputWidth = 3u; - unsigned int inputHeight = 2u; - unsigned int inputChannels = 1u; - unsigned int inputBatchSize = 1; - - std::vector<uint8_t> input = std::vector<uint8_t>{ - 51, 230, 28, - 251, 8, 92 - }; - - // Calculated manually. - std::vector<uint8_t> output = std::vector<uint8_t>{ - 51, 192, 32, - 192, 32, 92 - }; - - int32_t inputOffset = 112; - float inputScale = 0.0125f; - - return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, - inputScale, inputOffset, inputScale, inputOffset, // Input/output scale & offset same. - input, output, - inputWidth, inputHeight, inputChannels, inputBatchSize); -} - -namespace -{ - -struct BoundedReLuRandomInputTestTraits -{ - constexpr static unsigned int inputHeight = 31u; - constexpr static unsigned int inputWidth = 19u; - constexpr static unsigned int inputChannels = 4u; - constexpr static unsigned int inputBatchSize = 2; - - constexpr static unsigned int outputHeight = inputHeight; - constexpr static unsigned int outputWidth = inputWidth; - constexpr static unsigned int outputChannels = inputChannels; - constexpr static unsigned int outputBatchSize = inputBatchSize; - - static armnn::TensorInfo GetInputTensorInfo() - { - return armnn::TensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - } - - static armnn::TensorInfo GetOutputTensorInfo() - { - return armnn::TensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - } -}; - -boost::multi_array<float, 4> BoundedReLuRandomInputTest(armnn::IWorkloadFactory& workloadFactory, - float lowerBound, - float upperBound, - const armnn::ActivationDescriptor& activationDescriptor) -{ - const armnn::TensorInfo inputTensorInfo = BoundedReLuRandomInputTestTraits::GetInputTensorInfo(); - const armnn::TensorInfo outputTensorInfo = BoundedReLuRandomInputTestTraits::GetOutputTensorInfo(); - - boost::multi_array<float, 4> output(GetTensorShapeAsArray<4>(outputTensorInfo)); - - // Min/max random values passed to MakeRandomTensor are purposely outside of the ReLu - // range [lowerBound, upperBound]. - auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 4605828, lowerBound - 5.0f, upperBound * 2.0f); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - // Set up bounded ReLu. - armnn::ActivationQueueDescriptor descriptor; - armnn::WorkloadInfo workloadInfo; - AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - descriptor.m_Parameters = activationDescriptor; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&output[0][0][0][0], outputHandle.get()); - - return output; -} - -} // namespace - -LayerTestResult<float, 4> CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& otherWorkloadFactory, - float upperBound, - float lowerBound) -{ - LayerTestResult<float, 4> result(BoundedReLuRandomInputTestTraits::GetOutputTensorInfo()); - - armnn::ActivationDescriptor activationDescriptor; - activationDescriptor.m_Function = armnn::ActivationFunction::BoundedReLu; - activationDescriptor.m_A = upperBound; - activationDescriptor.m_B = lowerBound; - - result.output = BoundedReLuRandomInputTest(workloadFactory, 0.0f, upperBound, activationDescriptor); - result.outputExpected = BoundedReLuRandomInputTest(otherWorkloadFactory, 0.0f, upperBound, activationDescriptor); - - return result; -} - -template<typename T> -LayerTestResult<T,4> ConstantLinearActivationTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 0.0f, - int32_t qOffset = 0) -{ - unsigned int inputHeight = 20; - unsigned int inputWidth = 17; - unsigned int inputChannels = 3; - unsigned int batchSize = 5; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int shape[] = {batchSize, inputChannels, inputHeight, inputWidth}; - - inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - LayerTestResult<T, 4> ret(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - // Do linear activation that should leave the tensor unchanged. - armnn::ActivationQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Parameters.m_A = 1.0f; - data.m_Parameters.m_B = 0.0f; - data.m_Parameters.m_Function = armnn::ActivationFunction::Linear; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 7123561); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - // Ensure output equals input. - ret.outputExpected = input; - - return ret; -} - -LayerTestResult<float, 4> ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory) -{ - return ConstantLinearActivationTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return ConstantLinearActivationTestCommon<uint8_t>(workloadFactory, 4.0f, 3); -} - -template<typename T> -LayerTestResult<T, 4> SimpleActivationTest(armnn::IWorkloadFactory& workloadFactory, - armnn::ActivationFunction activationFunction, - float activationParameterA, - float activationParameterB, - float qScale, - int32_t qOffset, - const std::vector<float>& inputData, - const std::vector<float>& outputExpectedData) -{ - constexpr static unsigned int inputWidth = 16u; - constexpr static unsigned int inputHeight = 1u; - constexpr static unsigned int inputChannels = 1u; - constexpr static unsigned int inputBatchSize = 1u; - - constexpr static unsigned int outputWidth = inputWidth; - constexpr static unsigned int outputHeight = inputHeight; - constexpr static unsigned int outputChannels = inputChannels; - constexpr static unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - LayerTestResult<T, 4> result(inputTensorInfo); - - auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - // Setup bounded ReLu. - armnn::ActivationQueueDescriptor descriptor; - armnn::WorkloadInfo workloadInfo; - AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - - descriptor.m_Parameters.m_Function = activationFunction; - descriptor.m_Parameters.m_A = activationParameterA; - descriptor.m_Parameters.m_B = activationParameterB; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - - // Calculated manually. - result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, outputExpectedData)); - - return result; -} - -template<typename T> -LayerTestResult<T, 4> SimpleSigmoidTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) -{ - std::vector<float> inputData = { - -0.1f, -0.2f, -0.3f, -0.4f, - 0.1f, 0.2f, 0.3f, 0.4f, - -1.0f, -2.0f, -3.0f, -4.0f, - 1.0f, 2.0f, 3.0f, 4.0f - }; - - // Calculate output values for input. - auto f = [](float value) - { - return 1.0f / (1.0f + std::exp(-value)); - }; - std::vector<float> outputExpectedData(inputData.size()); - std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f); - - return SimpleActivationTest<T>(workloadFactory, - armnn::ActivationFunction::Sigmoid, - 0.f, - 0.f, - qScale, - qOffset, - inputData, - outputExpectedData); -} - -LayerTestResult<float, 4> SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory) -{ - return SimpleSigmoidTestCommon<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return SimpleSigmoidTestCommon<uint8_t>(workloadFactory, 0.1f, 50); -} - -template<typename T> -LayerTestResult<T,4> CompareActivationTestImpl(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::ActivationFunction f, - unsigned int batchSize = 5, - float qScale = 0.0f, - int32_t qOffset = 0) -{ - unsigned int width = 17; - unsigned int height = 29; - unsigned int channels = 2; - - float a = 0.234f; - float b = -12.345f; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int shape[] = {batchSize, channels, height, width}; - - inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - float minVal = -10.f; - if (f == armnn::ActivationFunction::Sqrt) - { - minVal = 0.f; - } - - boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 21453, minVal, 10.f); - - - LayerTestResult<T,4> ret(outputTensorInfo); - auto boostArrayExtents = boost::extents - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(batchSize)] - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(channels)] - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(height)] - [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(width)]; - ret.output.resize(boostArrayExtents); - ret.outputExpected.resize(boostArrayExtents); - - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ActivationQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Parameters.m_A = a; - data.m_Parameters.m_B = b; - data.m_Parameters.m_Function = f; - - armnn::ActivationQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(data, info); - BOOST_ASSERT(workload != nullptr); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateActivation(refData, refInfo); - BOOST_ASSERT(workloadRef != nullptr); - - inputHandle->Allocate(); - outputHandle->Allocate(); - inputHandleRef->Allocate(); - outputHandleRef->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); - - workload->Execute(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); - - return ret; -} - -LayerTestResult<float,4> CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::ActivationFunction f, - unsigned int batchSize) -{ - return CompareActivationTestImpl<float>(workloadFactory, refWorkloadFactory, f, batchSize); -} - -LayerTestResult<uint8_t,4> CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::ActivationFunction f) -{ - return CompareActivationTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, f, 5, 0.1f, 50); -} diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp deleted file mode 100644 index 9a516b6d60..0000000000 --- a/src/armnn/backends/test/ArmComputeCl.cpp +++ /dev/null @@ -1,311 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include <boost/test/unit_test.hpp> -#include "test/TensorHelpers.hpp" -#include "LayerTests.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/ClWorkloadFactory.hpp" -#include "backends/ClWorkloads/ClWorkloadUtils.hpp" -#include "backends/RefWorkloadFactory.hpp" -#include "backends/ClLayerSupport.hpp" -#include "ActivationFixture.hpp" -#include "ClContextControlFixture.hpp" - -#include <arm_compute/core/CL/CLKernelLibrary.h> -#include <arm_compute/runtime/CL/CLScheduler.h> -#include <string> -#include <iostream> - -#include "test/UnitTests.hpp" - -BOOST_FIXTURE_TEST_SUITE(Compute_ArmComputeCl, ClContextControlFixture) -using FactoryType = armnn::ClWorkloadFactory; - -// ============================================================================ -// UNIT tests - -// Activation -ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) - -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) - -ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) -ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) - -// Fully Connected -ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) -ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) - -ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) - -// Convolution -ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) - -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) - -// Depthwise Convolution -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) - -// Softmax -BOOST_AUTO_TEST_CASE(Softmax4dSupport) -{ - const unsigned int numDimensions = 4u; - std::array<unsigned int, numDimensions> dimensionSizes; - dimensionSizes.fill(1u); - - const armnn::TensorInfo inputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); - const armnn::TensorInfo outputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); - - // 4D Softmax should be reported as unsupported on the CL backend - BOOST_TEST(!armnn::IsSoftmaxSupportedCl(inputInfo, outputInfo, armnn::SoftmaxDescriptor())); -} - -// Splitter -ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) -ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) - -ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) -ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) - -// Merger -ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) -ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) - -// Pooling -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, - IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, - false) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, - true) -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) - -// Add -ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) - -ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) -ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) -ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) - -// Sub -ARMNN_AUTO_TEST_CASE(SimpleSub, SubtractionTest) - -// Div -ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest) -ARMNN_AUTO_TEST_CASE(DivisionByZero, DivisionByZeroTest) -ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest) -// NOTE: quantized division is not supported by CL and not required by the -// android NN api - -// Mul -ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) - -// Batch Norm -ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) - -ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) -ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) -ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) -ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) - -// Resize Bilinear -ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) - -// Constant -ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) -ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) - -// Concat -ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) -ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) - -// Floor -ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) - -// Reshape -ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) -ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) - -// Permute -ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) -ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) - -// Lstm -ARMNN_AUTO_TEST_CASE(LstmLayerFloat32WithCifgWithPeepholeNoProjection, - LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest) -ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection, - LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest) -ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjection, - LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest) - -// Convert from Float16 to Float32 -ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test) -// Convert from Float32 to Float16 -ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) - -// ============================================================================ -// COMPARE tests - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest<float>) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest<uint8_t>) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, - armnn::NormalizationAlgorithmChannel::Within, - armnn::NormalizationAlgorithmMethod::LocalBrightness) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, - armnn::NormalizationAlgorithmChannel::Across, - armnn::NormalizationAlgorithmMethod::LocalBrightness) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8, CompareSoftmaxUint8Test, 1.0f) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Average) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRefUint8, ComparePooling2dUint8Test, - armnn::PoolingAlgorithm::Average) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithRef, CompareMultiplicationTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) - -// ============================================================================ -// FIXTURE tests - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::TanH, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Linear, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, - CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::SoftReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Abs, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Square, 5u) - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp deleted file mode 100644 index f1a2cf65bd..0000000000 --- a/src/armnn/backends/test/ArmComputeNeon.cpp +++ /dev/null @@ -1,463 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include <boost/test/unit_test.hpp> - -#include "test/TensorHelpers.hpp" -#include "LayerTests.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/NeonLayerSupport.hpp" -#include "backends/NeonWorkloadFactory.hpp" -#include "backends/RefWorkloadFactory.hpp" -#include "backends/test/TensorCopyUtils.hpp" -#include "ActivationFixture.hpp" - -#include "WorkloadTestUtils.hpp" - -#include "test/UnitTests.hpp" - -BOOST_AUTO_TEST_SUITE(Compute_ArmComputeNeon) -using FactoryType = armnn::NeonWorkloadFactory; - -// ============================================================================ -// UNIT tests - -// Convolution -ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) - -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) - -namespace -{ - -armnn::Convolution2dDescriptor MakeConv2dDesc(uint32_t strideX, uint32_t strideY, - uint32_t padLeft = 0, uint32_t padRight = 0, uint32_t padTop = 0, uint32_t padBottom = 0) -{ - armnn::Convolution2dDescriptor result; - result.m_StrideX = strideX; - result.m_StrideY = strideY; - result.m_PadLeft = padLeft; - result.m_PadRight = padRight; - result.m_PadTop = padTop; - result.m_PadBottom = padBottom; - result.m_BiasEnabled = true; - return result; -} - -} - -BOOST_AUTO_TEST_CASE(Conv2dUtils) -{ - // The only preferred Neon convolution is 1x1 with padding=0 and stride size {1,2,3}. - armnn::TensorShape shape1x1({ 1,1,1,1 }); - armnn::TensorInfo info1x1(shape1x1, armnn::DataType::Float32); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 2))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 3))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 1))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 2))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 3))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 1))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 2))); - BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 3))); - - BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 1))); - BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 5))); - BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 6))); - - // non zero padding is not preferred for direct convolution - BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 0))); - BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 0, 1))); - BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 1))); - - // 2x2 filter not preferred for direct convolution - armnn::TensorShape shape2x2({ 1,1,2,2 }); - armnn::TensorInfo info2x2(shape2x2, armnn::DataType::Float32); - BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info2x2, MakeConv2dDesc(1, 1))); -} - -// Depthwise Convolution -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) - -namespace -{ - -armnn::DepthwiseConvolution2dDescriptor MakeDepthwiseConv2dDesc(uint32_t strideX, uint32_t strideY, - uint32_t depthMultiplier = 1, uint32_t padLeft = 0, uint32_t padRight = 0, - uint32_t padTop = 0, uint32_t padBottom = 0) -{ - boost::ignore_unused(depthMultiplier); - - armnn::DepthwiseConvolution2dDescriptor desc; - - desc.m_PadLeft = padLeft; - desc.m_PadRight = padRight; - - desc.m_PadTop = padTop; - desc.m_PadBottom = padBottom; - desc.m_StrideX = strideX; - desc.m_StrideY = strideY; - desc.m_BiasEnabled = false; - - return desc; -} - -armnn::TensorInfo CreateOutputTensorInfo(const armnn::TensorInfo& inputInfo, - const armnn::TensorInfo& weightsInfo, - const armnn::DepthwiseConvolution2dDescriptor& descriptor, - armnn::DataType dataType) -{ - const armnn::TensorShape& inputShape = inputInfo.GetShape(); - const armnn::TensorShape& filterShape = weightsInfo.GetShape(); - - unsigned int inWidth = inputShape[3]; - unsigned int inHeight = inputShape[2]; - unsigned int inBatchSize = inputShape[0]; - - unsigned int filterWidth = filterShape[3]; - unsigned int readWidth = (inWidth + descriptor.m_PadLeft + descriptor.m_PadRight) - (filterWidth); - unsigned int outWidth = 1u + (readWidth / descriptor.m_StrideX); - - unsigned int filterHeight = filterShape[2]; - unsigned int readHeight = (inHeight + descriptor.m_PadTop + descriptor.m_PadBottom) - (filterHeight); - unsigned int outHeight = 1u + (readHeight / descriptor.m_StrideY); - unsigned int depthMultiplier = filterShape[0]; - - unsigned int outChannels = filterShape[1] * depthMultiplier; - unsigned int outBatchSize = inBatchSize; - - armnn::TensorShape outputShape({outBatchSize, outChannels, outHeight, outWidth}); - return armnn::TensorInfo(outputShape, dataType); -} -} - -BOOST_AUTO_TEST_CASE(DepthwiseConv2dUtils) -{ - const armnn::DataType dataType = armnn::DataType::Float32; - - armnn::TensorInfo inputInfo({1, 1, 10, 10 }, dataType); - armnn::TensorInfo outputInfo; - armnn::TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, dataType); - armnn::TensorInfo biasesInfo; - - armnn::DepthwiseConvolution2dDescriptor descriptor; - - // Strides supported: 1,2,3 - descriptor = MakeDepthwiseConv2dDesc(1, 1); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(1, 2); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(1, 3); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(2, 1); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(2, 2); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(2, 3); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(3, 1); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(3, 2); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - descriptor = MakeDepthwiseConv2dDesc(3, 3); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - // Supported stride 4 - descriptor = MakeDepthwiseConv2dDesc(4, 1); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); - - // Supported weights shape 1x1 - armnn::TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, armnn::DataType::Float32); - descriptor = MakeDepthwiseConv2dDesc(1, 1); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo1x1, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo1x1, biasesInfo)); - - // Supported shape 2x2 - armnn::TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, armnn::DataType::Float32); - descriptor = MakeDepthwiseConv2dDesc(1, 1); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo2x2, biasesInfo)); - - // Asymmetric padding - descriptor = MakeDepthwiseConv2dDesc(1, 1, 1, 1, 2, 1, 2); - outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, - weightsInfo3x3, biasesInfo)); -} - -// Pooling -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) - -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) - -// Ignore padding values for pooling but count padding fields into the divisor -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, - IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, - true) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) - -// Activation -ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) - -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) - -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) - -ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) -ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) - -// Softmax -BOOST_AUTO_TEST_CASE(Softmax4dSupport) -{ - const unsigned int numDimensions = 4u; - std::array<unsigned int, numDimensions> dimensionSizes; - dimensionSizes.fill(1u); - - const armnn::TensorInfo inputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); - const armnn::TensorInfo outputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); - - // 4D Softmax should be reported as unsupported on the NEON backend - BOOST_TEST(!armnn::IsSoftmaxSupportedNeon(inputInfo, outputInfo, armnn::SoftmaxDescriptor())); -} - -// Splitter -ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) -ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) - -ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) -ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) - -// Merger -ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) -ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) - -// Fully Connected -ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) -ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) - -// Add -ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) - -// Sub -ARMNN_AUTO_TEST_CASE(SimpleSub, SubtractionTest) - -// Mul -ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) - -// Batch Norm -ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) - -// Constant -ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) -ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) - -// Concatenation -ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) -ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) - -// L2 Normalization -ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest); -ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest); -ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest); -ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest); - -// Floor -ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) - -// Reshape -ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) -ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) - -// Permute -ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) -ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) - -// ============================================================================ -// COMPARE tests - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest<float>) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest<uint8_t>) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, - armnn::NormalizationAlgorithmChannel::Within, - armnn::NormalizationAlgorithmMethod::LocalBrightness) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, - armnn::NormalizationAlgorithmChannel::Across, - armnn::NormalizationAlgorithmMethod::LocalBrightness) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReferenceUint8, ComparePooling2dUint8Test, - armnn::PoolingAlgorithm::Max) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReference, ComparePooling2dTest, - armnn::PoolingAlgorithm::Average) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReferenceUint8, ComparePooling2dUint8Test, - armnn::PoolingAlgorithm::Average) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(UNSUPPORTED_CompareL2Pooling2dWithReferenceUint8, ComparePooling2dUint8Test, - armnn::PoolingAlgorithm::L2) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta1WithReference, CompareSoftmaxUint8Test, 1.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta2WithReference, CompareSoftmaxUint8Test, 2.0f) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithReference, CompareMultiplicationTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) - -// ============================================================================ -// FIXTURE tests - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::TanH, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Linear, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, - CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::SoftReLu, 1u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Abs, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, - CompareActivationTest, armnn::ActivationFunction::Square, 5u) -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/BatchNormTestImpl.hpp b/src/armnn/backends/test/BatchNormTestImpl.hpp deleted file mode 100644 index 7126db9074..0000000000 --- a/src/armnn/backends/test/BatchNormTestImpl.hpp +++ /dev/null @@ -1,112 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -#include "backends/test/QuantizeHelper.hpp" - - -template<typename T> -LayerTestResult<T,4> BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset) -{ - const unsigned int width = 2; - const unsigned int height = 3; - const unsigned int channels = 2; - const unsigned int num = 1; - - armnn::TensorInfo inputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>()); - armnn::TensorInfo tensorInfo({channels}, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - tensorInfo.SetQuantizationScale(qScale); - tensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, - { - 1.f, 4.f, - 4.f, 2.f, - 1.f, 6.f, - - 1.f, 1.f, - 4.f, 1.f, - -2.f, 4.f - })); - // These values are per-channel of the input. - auto mean = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2})); - auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4, 9})); - auto beta = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, 2})); - auto gamma = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2, 1})); - LayerTestResult<T,4> ret(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::BatchNormalizationQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); - armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); - armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); - armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); - - AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); - AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); - AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); - AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Mean = &meanTensor; - data.m_Variance = &varianceTensor; - data.m_Beta = &betaTensor; - data.m_Gamma = &gammaTensor; - data.m_Parameters.m_Eps = 0.0f; - - // For each channel: - // substract mean, divide by standard deviation (with an epsilon to avoid div by 0), - // multiply by gamma and add beta - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, - { - 1.f, 4.f, - 4.f, 2.f, - 1.f, 6.f, - - 3.f, 3.f, - 4.f, 3.f, - 2.f, 4.f - })); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -}
\ No newline at end of file diff --git a/src/armnn/backends/test/ClContextControlFixture.hpp b/src/armnn/backends/test/ClContextControlFixture.hpp deleted file mode 100644 index 54c5a4f505..0000000000 --- a/src/armnn/backends/test/ClContextControlFixture.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/ClContextControl.hpp" - -template<bool ProfilingEnabled> -struct ClContextControlFixtureBase -{ - // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case - ClContextControlFixtureBase() : m_ClContextControl(nullptr, ProfilingEnabled) {} - ~ClContextControlFixtureBase() {} - - armnn::ClContextControl m_ClContextControl; -}; - -using ClContextControlFixture = ClContextControlFixtureBase<false>; -using ClProfilingContextControlFixture = ClContextControlFixtureBase<true>; diff --git a/src/armnn/backends/test/Conv2dTestImpl.hpp b/src/armnn/backends/test/Conv2dTestImpl.hpp deleted file mode 100644 index eb7165bf09..0000000000 --- a/src/armnn/backends/test/Conv2dTestImpl.hpp +++ /dev/null @@ -1,921 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" -#include "QuantizeHelper.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -// Mapping from input type to bias type for fully connected layers. -// float => float, uint8_t => int32_t -template<typename T> -struct FullyConnectedBiasTypeForInputType; - -template<> -struct FullyConnectedBiasTypeForInputType<float> -{ - using Type = float; -}; - -template<> -struct FullyConnectedBiasTypeForInputType<uint8_t> -{ - using Type = int32_t; -}; - -// Modifies a std::vector in-place using a specified bias. -template<typename T, typename B> -void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset, - const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h) -{ - BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()), - "Invalid type and parameter combination."); - BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()), - "Invalid type and parameter combination."); - - // Note we need to dequantize and re-quantize the image value and the bias. - for (uint32_t i = 0; i < bias.size(); ++i) - { - float dBias = SelectiveDequantize(bias[i], bScale, bOffset); - for (uint32_t y = 0; y < h; ++y) - { - for (uint32_t x = 0; x < w; ++x) - { - uint32_t offset = (i * h + y) * w + x; - BOOST_ASSERT(offset < v.size()); - T& outRef = v[offset]; - float dOutput = SelectiveDequantize(outRef, vScale, vOffset); - outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset); - } - } - } -} - -template<typename T, typename B> -LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, - const boost::multi_array<T, 4>& input, - const boost::multi_array<T, 4>& kernel, - const boost::multi_array<B, 1>& bias, - const boost::multi_array<T, 4>& outputExpected, - float qScale, - int32_t qOffset, - uint32_t padLeft = 0, - uint32_t padTop = 0, - uint32_t padRight = 0, - uint32_t padBottom = 0) -{ - unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); - unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); - unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); - unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]); - - unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); - unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); - unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); - unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); - - unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); - unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); - unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]); - unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); - - bool biasEnabled = bias.size() > 0; - - // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches). - BOOST_ASSERT(inputNum == 1); - BOOST_ASSERT(outputNum == 1); - - // If a bias is used, its size must equal the number of output channels. - BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); - - - // Note these tensors will use two (identical) batches. - armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth}, - armnn::GetDataType<T>()); - armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); - armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - kernelDesc.SetQuantizationScale(qScale); - kernelDesc.SetQuantizationOffset(qOffset); - biasDesc.SetQuantizationScale(qScale*qScale); - biasDesc.SetQuantizationOffset(0); - } - - LayerTestResult<T, 4> ret(outputTensorInfo); - - // Construct input data - two batches of the same input image. - std::vector<T> inputImage; - inputImage.assign(input.data(), input.data() + 1*inputChannels*inputHeight*inputWidth); - std::vector<T> inputData; - inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); - inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); - auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); - - std::vector<T> outputImage; - outputImage.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); - - // Apply bias to output image if it is enabled. - if(biasEnabled) - { - std::vector<T> biasV; - biasV.assign(bias.data(), bias.data() + outputChannels); - ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), - biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), - outputWidth, outputHeight); - } - - // Construct expected output data - two identical images. - std::vector<T> outputData; - outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); - outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); - - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); - - // Todo: nontrivial padding and strides. - uint32_t strideX = 1; - uint32_t strideY = 1; - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::Convolution2dQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); - armnn::ScopedCpuTensorHandle biasTensor(biasDesc); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); - - if(biasEnabled) - { - AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); - } - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. - data.m_Parameters.m_StrideX = strideX; - data.m_Parameters.m_StrideY = strideY; - data.m_Parameters.m_PadLeft = padLeft; - data.m_Parameters.m_PadRight = padRight; - data.m_Parameters.m_PadTop = padTop; - data.m_Parameters.m_PadBottom = padBottom; - data.m_Parameters.m_BiasEnabled = biasEnabled; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -template<typename T, typename B> -LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory& workloadFactory, - const boost::multi_array<T, 4>& input, - const boost::multi_array<T, 4>& kernel, - const boost::multi_array<B, 1>& bias, - const boost::multi_array<T, 4>& outputExpected, - float qScale, - int32_t qOffset, - uint32_t padLeft = 0, - uint32_t padTop = 0, - uint32_t padRight = 0, - uint32_t padBottom = 0, - uint32_t strideX = 1, - uint32_t strideY = 1) -{ - unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]); - unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); - unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); - unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); - unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); - unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]); - unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); - unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); - unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); - unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); - unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); - unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); - - // If a bias is used, its size must equal the number of output channels. - bool biasEnabled = bias.size() > 0; - BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); - - // Creates the tensors. - armnn::TensorInfo inputTensorInfo({inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({outputNum, outputChannels, outputHeight, outputWidth}, - armnn::GetDataType<T>()); - armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); - armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); - - // Set quantization parameters if the requested type is a quantized type. - if (armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - kernelDesc.SetQuantizationScale(qScale); - kernelDesc.SetQuantizationOffset(qOffset); - biasDesc.SetQuantizationScale(qScale*qScale); - biasDesc.SetQuantizationOffset(0); - } - - // Construct the input data. - std::vector<T> inputData; - inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth); - auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); - - // Construct the output data, with bias applied, as appropriate. - std::vector<T> outputData; - outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); - if (biasEnabled) - { - std::vector<T> biasV; - biasV.assign(bias.data(), bias.data() + outputChannels); - ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), - biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), - outputWidth, outputHeight); - } - - LayerTestResult<T, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); - AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); - - armnn::ScopedCpuTensorHandle biasTensor(biasDesc); - if (biasEnabled) - { - AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); - } - - armnn::DepthwiseConvolution2dQueueDescriptor data; - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs. - data.m_Parameters.m_StrideX = strideX; - data.m_Parameters.m_StrideY = strideY; - data.m_Parameters.m_PadLeft = padLeft; - data.m_Parameters.m_PadRight = padRight; - data.m_Parameters.m_PadTop = padTop; - data.m_Parameters.m_PadBottom = padBottom; - data.m_Parameters.m_BiasEnabled = biasEnabled; - - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -template<typename T, typename B> -LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset, - bool biasEnabled) -{ - unsigned int inputHeight = 3; - unsigned int inputWidth = 3; - unsigned int inputChannels = 2; - unsigned int inputNum = 1; - - unsigned int kernelHeight = 3; - unsigned int kernelWidth = 3; - unsigned int kernelChannels = inputChannels; - - unsigned int outputHeight = 1; - unsigned int outputWidth = 1; - unsigned int outputChannels = kernelChannels; - unsigned int outputNum = inputNum; - - armnn::TensorInfo inputTensorInfo({ inputNum, inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ outputNum, outputChannels, outputHeight, outputWidth }, - armnn::GetDataType<T>()); - armnn::TensorInfo kernelDesc({ 1, outputChannels, kernelHeight, kernelWidth }, armnn::GetDataType<T>()); - armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - kernelDesc.SetQuantizationScale(qScale); - kernelDesc.SetQuantizationOffset(qOffset); - biasDesc.SetQuantizationScale(qScale*qScale); - biasDesc.SetQuantizationOffset(0); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( - QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { - 1.f, 2.f, 1.f, - 2.f, 1.f, 2.f, - 1.f, 2.f, 1.f, - - 1.f, 2.f, 1.f, - 2.f, 1.f, 2.f, - 1.f, 2.f, 1.f, - }))); - - std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), - {0, 2})); - auto bias = MakeTensor<B, 1>(biasDesc, biasV); - - auto kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( - QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { - 1.f, 0.f, 1.f, - 0.f, 0.f, 0.f, - -1.f, 0.f, -1.f, - - 1.f, 0.f, 1.f, - 0.f, 0.f, 0.f, - -1.f, 0.f, -1.f, - }))); - - // Manually calculated. - std::vector<T> outputImage( - QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), - outputTensorInfo.GetQuantizationOffset(), - {0.f, 0.f}) - ); - - // Optionally apply bias to output image. - if(biasEnabled) - { - ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), - biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), - outputWidth, outputHeight); - } - - LayerTestResult<T, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::DepthwiseConvolution2dQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); - armnn::ScopedCpuTensorHandle biasTensor(biasDesc); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); - AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. - data.m_Parameters.m_StrideX = 1; - data.m_Parameters.m_StrideY = 1; - data.m_Parameters.m_PadLeft = 0; - data.m_Parameters.m_PadRight = 0; - data.m_Parameters.m_PadTop = 0; - data.m_Parameters.m_PadBottom = 0; - data.m_Parameters.m_BiasEnabled = biasEnabled; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -template<typename T, typename B> -LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset, - bool biasEnabled) -{ - unsigned int depthMultiplier = 2; - - unsigned int inputHeight = 8; - unsigned int inputWidth = 16; - unsigned int inputChannels = 2; - unsigned int inputBatchSize = 1; - - unsigned int kernelHeight = 5; - unsigned int kernelWidth = 3; - - unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2; - unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2; - unsigned int outputChannels = inputChannels * depthMultiplier; - unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({inputBatchSize, inputChannels, inputHeight, inputWidth}, - armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({outputBatchSize, outputChannels, outputHeight, outputWidth}, - armnn::GetDataType<T>()); - armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); - armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType<B>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - kernelDesc.SetQuantizationScale(qScale); - kernelDesc.SetQuantizationOffset(qOffset); - biasDesc.SetQuantizationScale(qScale*qScale); - biasDesc.SetQuantizationOffset(0); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( - QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }))); - - std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), - {0, 2, 1, -1})); - auto bias = MakeTensor<B, 1>(biasDesc, biasV); - - auto kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( - QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { - 1, 1, 1, - 1, -1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - - 0, 0, 0, - 0, -1, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - - 0, 0, 0, - 0, 0, 0, - 0, 1, 0, - 0, 0, 0, - 0, 0, 0 - }))); - - // Manually calculated. - std::vector<T> outputImage = std::vector<T>( - QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { - 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, - 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, - 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, - 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, - 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, - 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, - - -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, - -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, - -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, - -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, - - 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f - })); - - // Optionally apply bias to output image. - if(biasEnabled) - { - ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), - biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), - outputWidth, outputHeight); - } - - LayerTestResult<T, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::DepthwiseConvolution2dQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); - armnn::ScopedCpuTensorHandle biasTensor(biasDesc); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); - AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. - data.m_Parameters.m_StrideX = 2; - data.m_Parameters.m_StrideY = 1; - data.m_Parameters.m_PadLeft = 0; - data.m_Parameters.m_PadRight = 0; - data.m_Parameters.m_PadTop = 1; - data.m_Parameters.m_PadBottom = 1; - data.m_Parameters.m_BiasEnabled = biasEnabled; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -template<typename T> -LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset, - bool biasEnabled) -{ - using B = typename FullyConnectedBiasTypeForInputType<T>::Type; - - // Until we have a specialist 1D convolution layer, we can fake one using - // 2D convolution with the final dimension set to 1. - // I don't anticipate this being particularly slow, given that convolution is implemented - // as a matrix multiplication, at which point dimension doesn't matter. - - unsigned int batchSize = 1; - unsigned int inputChannels = 2; - unsigned int outputChannels = 3; - unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height'). - unsigned int kernelSize = 3; - unsigned int padSize = 2; - unsigned int stride = 1; - unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride. - - armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, armnn::GetDataType<T>()); - armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, armnn::GetDataType<T>()); - armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, armnn::GetDataType<T>()); - armnn::TensorInfo biasInfo({outputChannels}, armnn::GetDataType<B>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputInfo.SetQuantizationScale(qScale); - inputInfo.SetQuantizationOffset(qOffset); - outputInfo.SetQuantizationScale(qScale); - outputInfo.SetQuantizationOffset(qOffset); - kernelInfo.SetQuantizationScale(qScale); - kernelInfo.SetQuantizationOffset(qOffset); - biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale()); - biasInfo.SetQuantizationOffset(0); - } - - std::vector<T> inputData( - QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), { - 5.0f, -2.0f, 2.5f, 0.0f, 1.0f, - -3.0f, 3.2f, 5.0f, 2.0f, 3.0f, - })); - - std::vector<T> kernelData( - QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), { - 1.0f, 0.0f, 0.0f, - 0.0f, 2.0f, -1.5f, - - 0.0f, 0.0f, 0.0f, - 0.2f, 0.2f, 0.2f, - - 0.5f, 0.0f, 0.5f, - 0.0f, -1.0f, 0.0f - })); - - std::vector<B> biasData( - QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), { - 1.0f, 0.0f, 0.0f - })); - - std::vector<T> outputData( - QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), { - 4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f, - -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f, - 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f - })); - - // Optionally apply bias to output image. - if(biasEnabled) - { - ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), - biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), - 1, outputSize); - } - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo); - - armnn::Convolution2dQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo); - armnn::ScopedCpuTensorHandle biasTensor(biasInfo); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); - AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); - - AddInputToWorkload(data, info, inputInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputInfo, outputHandle.get()); - - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; - data.m_Parameters.m_StrideX = 1; - data.m_Parameters.m_StrideY = stride; - data.m_Parameters.m_PadLeft = 0; - data.m_Parameters.m_PadRight = 0; - data.m_Parameters.m_PadTop = padSize; - data.m_Parameters.m_PadBottom = padSize; - data.m_Parameters.m_BiasEnabled = biasEnabled; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), inputData.data()); - - workloadFactory.Finalize(); - workload->Execute(); - - // Output - LayerTestResult<T,4> ret(outputInfo); - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData); - return ret; -} - - - -template<typename T> -LayerTestResult<T,4> CompareConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory) -{ - unsigned int inputHeight = 8; - unsigned int inputWidth = 16; - unsigned int inputChannels = 3; - unsigned int inputNum = 5; - - unsigned int kernelHeight = 3; - unsigned int kernelWidth = 3; - - unsigned int strideX = 2; - unsigned int strideY = 3; - unsigned int padX = 1; - unsigned int padY = 1; - - unsigned int outputNum = inputNum; - unsigned int outputChannels = 2; - unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; - unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - armnn::TensorInfo kernelDesc; - armnn::TensorInfo biasDesc; - - unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; - unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; - unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; - unsigned int biasShape[] = {outputChannels}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>()); - kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType<T>()); - biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType<T>()); - - LayerTestResult<T,4> ret(outputTensorInfo); - - auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908); - auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234); - auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::Convolution2dQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); - armnn::ScopedCpuTensorHandle biasTensor(biasDesc); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); - AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; - data.m_Parameters.m_StrideX = strideX; - data.m_Parameters.m_StrideY = strideY; - data.m_Parameters.m_PadLeft = padX; - data.m_Parameters.m_PadRight = padX; - data.m_Parameters.m_PadTop = padY; - data.m_Parameters.m_PadBottom = padY; - data.m_Parameters.m_BiasEnabled = true; - - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); - - armnn::Convolution2dQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo); - - outputHandleRef->Allocate(); - inputHandleRef->Allocate(); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - refWorkloadFactory.Finalize(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); - - return ret; -} - -template<typename T> -LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory) -{ - unsigned int inputHeight = 8; - unsigned int inputWidth = 16; - unsigned int inputChannels = 3; - unsigned int inputNum = 5; - - unsigned int kernelHeight = 3; - unsigned int kernelWidth = 3; - unsigned int channelMultiplier = 1; - - unsigned int strideX = 2; - unsigned int strideY = 3; - unsigned int padX = 1; - unsigned int padY = 1; - - unsigned int outputNum = inputNum; - unsigned int outputChannels = inputChannels * channelMultiplier; - unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; - unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - armnn::TensorInfo kernelDesc; - armnn::TensorInfo biasDesc; - - unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; - unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; - unsigned int kernelShape[] = { channelMultiplier, inputChannels, kernelHeight, kernelWidth }; - unsigned int biasShape[] = { outputChannels }; - - float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0; - float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0; - int32_t qOffset = 0; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>(), inputsQScale, qOffset); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>(), outputQScale, qOffset); - kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType<T>(), inputsQScale, qOffset); - biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetBiasDataType(armnn::GetDataType<T>()), inputsQScale, qOffset); - - LayerTestResult<T, 4> ret(outputTensorInfo); - - auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f); - auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f); - auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(biasDesc, 1028, 0.0f, 255.0f); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::DepthwiseConvolution2dQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); - armnn::ScopedCpuTensorHandle biasTensor(biasDesc); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); - AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; - data.m_Parameters.m_StrideX = strideX; - data.m_Parameters.m_StrideY = strideY; - data.m_Parameters.m_PadLeft = padX; - data.m_Parameters.m_PadRight = padX; - data.m_Parameters.m_PadTop = padY; - data.m_Parameters.m_PadBottom = padY; - data.m_Parameters.m_BiasEnabled = true; - - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); - - armnn::DepthwiseConvolution2dQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo); - - outputHandleRef->Allocate(); - inputHandleRef->Allocate(); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - refWorkloadFactory.Finalize(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); - - return ret; -} diff --git a/src/armnn/backends/test/ConvertFp16ToFp32TestImpl.hpp b/src/armnn/backends/test/ConvertFp16ToFp32TestImpl.hpp deleted file mode 100644 index b75879dea6..0000000000 --- a/src/armnn/backends/test/ConvertFp16ToFp32TestImpl.hpp +++ /dev/null @@ -1,55 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> - -#include <backends/WorkloadInfo.hpp> -#include <backends/CpuTensorHandle.hpp> - -#include <test/TensorHelpers.hpp> - -#include <Half.hpp> - -LayerTestResult<float, 4> SimpleConvertFp16ToFp32Test(armnn::IWorkloadFactory& workloadFactory) -{ - using namespace half_float::literal; - - const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); - const armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); - - auto input = MakeTensor<armnn::Half, 4>(inputTensorInfo, - { -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h, - 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h }); - - LayerTestResult<float, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, - { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, - 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ConvertFp16ToFp32QueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvertFp16ToFp32(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} diff --git a/src/armnn/backends/test/ConvertFp32ToFp16TestImpl.hpp b/src/armnn/backends/test/ConvertFp32ToFp16TestImpl.hpp deleted file mode 100644 index 1325b4b054..0000000000 --- a/src/armnn/backends/test/ConvertFp32ToFp16TestImpl.hpp +++ /dev/null @@ -1,55 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> - -#include <backends/WorkloadInfo.hpp> -#include <backends/CpuTensorHandle.hpp> - -#include <test/TensorHelpers.hpp> - -#include <Half.hpp> - -LayerTestResult<armnn::Half, 4> SimpleConvertFp32ToFp16Test(armnn::IWorkloadFactory& workloadFactory) -{ - using namespace half_float::literal; - - const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); - - auto input = MakeTensor<float, 4>(inputTensorInfo, - { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, - 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); - - LayerTestResult<armnn::Half, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<armnn::Half, 4>(outputTensorInfo, - { -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h, - 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h }); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ConvertFp32ToFp16QueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvertFp32ToFp16(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -}
\ No newline at end of file diff --git a/src/armnn/backends/test/CreateWorkloadCl.cpp b/src/armnn/backends/test/CreateWorkloadCl.cpp deleted file mode 100644 index af3192cae2..0000000000 --- a/src/armnn/backends/test/CreateWorkloadCl.cpp +++ /dev/null @@ -1,564 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "backends/ClWorkloadFactory.hpp" -#include "backends/RefWorkloadFactory.hpp" -#include "backends/MemCopyWorkload.hpp" -#include "backends/ClWorkloads/ClWorkloadUtils.hpp" -#include "backends/ClWorkloads.hpp" -#include "backends/ClTensorHandle.hpp" -#include "ClContextControlFixture.hpp" - -#include "test/CreateWorkloadClNeon.hpp" - -boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, - std::initializer_list<unsigned int> expectedDimensions) -{ - return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions); -} - -BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture) - -template <typename ActivationWorkloadType, armnn::DataType DataType> -static void ClCreateActivationWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreateActivationWorkloadTest<ActivationWorkloadType, DataType>(factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). - ActivationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1})); -} - -BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload) -{ - ClCreateActivationWorkloadTest<ClActivationFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) -{ - ClCreateActivationWorkloadTest<ClActivationFloatWorkload, armnn::DataType::Float16>(); -} - -template <typename WorkloadType, - typename DescriptorType, - typename LayerType, - armnn::DataType DataType> -static void ClCreateArithmethicWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateArithmeticWorkloadTest). - DescriptorType queueDescriptor = workload->GetData(); - auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); -} - -BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) -{ - ClCreateArithmethicWorkloadTest<ClAdditionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) -{ - ClCreateArithmethicWorkloadTest<ClAdditionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) -{ - ClCreateArithmethicWorkloadTest<ClSubtractionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload) -{ - ClCreateArithmethicWorkloadTest<ClSubtractionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest) -{ - ClCreateArithmethicWorkloadTest<ClMultiplicationFloatWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest) -{ - ClCreateArithmethicWorkloadTest<ClMultiplicationFloatWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest) -{ - ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest) -{ - ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::Float16>(); -} - -template <typename BatchNormalizationWorkloadType, armnn::DataType DataType> -static void ClCreateBatchNormalizationWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType> - (factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). - BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 1, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3, 1, 1})); -} - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatWorkload) -{ - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload) -{ - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload) -{ - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph); - - ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3})); - BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); - BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); -} - -BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload) -{ - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph); - - ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3})); - BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); - BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); -} - -template <typename Convolution2dWorkloadType, typename armnn::DataType DataType> -static void ClConvolution2dWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateConvolution2dWorkloadTest<Convolution2dWorkloadType, DataType> - (factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). - Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 8, 16})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 2, 10})); -} - -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatWorkload) -{ - ClConvolution2dWorkloadTest<ClConvolution2dFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload) -{ - ClConvolution2dWorkloadTest<ClConvolution2dFloatWorkload, armnn::DataType::Float16>(); -} - - -template <typename Convolution2dWorkloadType, typename armnn::DataType DataType> -static void ClDirectConvolution2dWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateDirectConvolution2dWorkloadTest<Convolution2dWorkloadType, DataType>( - factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest). - Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6})); -} - -BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload) -{ - ClDirectConvolution2dWorkloadTest<ClConvolution2dFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload) -{ - ClDirectConvolution2dWorkloadTest<ClConvolution2dFloatWorkload, armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload) -{ - ClDirectConvolution2dWorkloadTest<ClConvolution2dUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType> -static void ClCreateFullyConnectedWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - auto workload = - CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). - FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7})); -} - - -BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest) -{ - ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest) -{ - ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>(); -} - -template <typename NormalizationWorkloadType, typename armnn::DataType DataType> -static void ClNormalizationWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType> - (factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). - NormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 5, 5, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 5, 5, 1})); -} - -BOOST_AUTO_TEST_CASE(CreateNormalizationFloatWorkload) -{ - ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload) -{ - ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(); -} - -template <typename Pooling2dWorkloadType, typename armnn::DataType DataType> -static void ClPooling2dWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreatePooling2dWorkloadTest<Pooling2dWorkloadType, DataType>(factory, graph); - - // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest). - Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 5, 5})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 2, 4})); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloatWorkload) -{ - ClPooling2dWorkloadTest<ClPooling2dFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload) -{ - ClPooling2dWorkloadTest<ClPooling2dFloatWorkload, armnn::DataType::Float16>(); -} - -template <typename ReshapeWorkloadType, typename armnn::DataType DataType> -static void ClCreateReshapeWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). - ReshapeQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4})); // Leading size 1 dimensions are collapsed by ACL. -} - -BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload) -{ - ClCreateReshapeWorkloadTest<ClReshapeFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) -{ - ClCreateReshapeWorkloadTest<ClReshapeFloatWorkload, armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) -{ - ClCreateReshapeWorkloadTest<ClReshapeUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -template <typename SoftmaxWorkloadType, typename armnn::DataType DataType> -static void ClSoftmaxWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload). - SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1})); -} - - -BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest) -{ - ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest) -{ - ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float16>(); -} - -template <typename SplitterWorkloadType, typename armnn::DataType DataType> -static void ClSplitterWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreateSplitterWorkloadTest<SplitterWorkloadType, DataType>(factory, graph); - - // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). - SplitterQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7})); - - auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7})); - - auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7})); - - auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - // NOTE: At the moment the CL collapses the tensor to a 2 dim when dimension zero = 1 - // we are raising this difference between the NEON and CL libs as an issue with the compute library team. - BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {7, 7})); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload) -{ - ClSplitterWorkloadTest<ClSplitterFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload) -{ - ClSplitterWorkloadTest<ClSplitterFloatWorkload, armnn::DataType::Float16>(); -} - -template <typename SplitterWorkloadType, typename MergerWorkloadType, typename armnn::DataType DataType> -static void ClSplitterMergerTest() -{ - // Tests that it is possible to decide which output of the splitter layer - // should be lined to which input of the merger layer. - // We test that is is possible to specify 0th output - // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input - // of the merger. - - Graph graph; - ClWorkloadFactory factory; - - auto workloads = - CreateSplitterMergerWorkloadTest<SplitterWorkloadType, MergerWorkloadType, DataType> - (factory, graph); - - auto wlSplitter = std::move(workloads.first); - auto wlMerger = std::move(workloads.second); - - //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. - armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[1]); - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(mIn0); - BOOST_TEST(mIn1); - - //Fliped order of inputs/outputs. - bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); - BOOST_TEST(validDataPointers); - - - //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor. - bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent()) - && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent()); - - BOOST_TEST(validSubTensorParents); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloatWorkload) -{ - ClSplitterMergerTest<ClSplitterFloatWorkload, ClMergerFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat16Workload) -{ - ClSplitterMergerTest<ClSplitterFloatWorkload, ClMergerFloatWorkload, armnn::DataType::Float16>(); -} - - -BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) -{ - // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. - // We create a splitter with two outputs. That each of those outputs is used by two different activation layers. - - Graph graph; - ClWorkloadFactory factory; - std::unique_ptr<ClSplitterFloatWorkload> wlSplitter; - std::unique_ptr<ClActivationFloatWorkload> wlActiv0_0; - std::unique_ptr<ClActivationFloatWorkload> wlActiv0_1; - std::unique_ptr<ClActivationFloatWorkload> wlActiv1_0; - std::unique_ptr<ClActivationFloatWorkload> wlActiv1_1; - - CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterFloatWorkload, - ClActivationFloatWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, - wlActiv1_0, wlActiv1_1); - - //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. - armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); - - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(activ0_0Im); - BOOST_TEST(activ0_1Im); - BOOST_TEST(activ1_0Im); - BOOST_TEST(activ1_1Im); - - bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && - (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); - - BOOST_TEST(validDataPointers); -} - -BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) -{ - ClWorkloadFactory factory; - CreateMemCopyWorkloads<IClTensorHandle>(factory); -} - -BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload) -{ - Graph graph; - ClWorkloadFactory factory; - - auto workload = CreateL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32> - (factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). - L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 5, 20, 50, 67 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 5, 20, 50, 67 })); -} - -template <typename LstmWorkloadType> -static void ClCreateLstmWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph); - - LstmQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 })); -} - -BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload) -{ - ClCreateLstmWorkloadTest<ClLstmFloatWorkload>(); -} - - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadNeon.cpp b/src/armnn/backends/test/CreateWorkloadNeon.cpp deleted file mode 100644 index fbe064e1c4..0000000000 --- a/src/armnn/backends/test/CreateWorkloadNeon.cpp +++ /dev/null @@ -1,455 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "backends/NeonWorkloadFactory.hpp" -#include "backends/NeonWorkloadUtils.hpp" -#include "backends/NeonWorkloads.hpp" -#include "backends/MemCopyWorkload.hpp" -#include "backends/NeonTensorHandle.hpp" - -#include "test/CreateWorkloadClNeon.hpp" - -BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon) - -namespace -{ - -bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::TensorInfo& expectedInfo) -{ - using namespace armnn::armcomputetensorutils; - - const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info(); - const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo); - - if (handleInfo->data_type() != expectedAclInfo.data_type()) - { - return false; - } - - if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions()) - { - return false; - } - - if (handleInfo->quantization_info() != expectedAclInfo.quantization_info()) - { - return false; - } - - for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d) - { - if (handleInfo->dimension(d) != expectedAclInfo.dimension(d)) - { - return false; - } - } - - return true; -} - -} // namespace - -template <typename ActivationWorkloadType, typename armnn::DataType DataType> -static void NeonCreateActivationWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateActivationWorkloadTest<ActivationWorkloadType, DataType> - (factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). - ActivationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) -{ - NeonCreateActivationWorkloadTest<NeonActivationFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload) -{ - NeonCreateActivationWorkloadTest<NeonActivationFloatWorkload, DataType::Float32>(); -} - -template <typename WorkloadType, - typename DescriptorType, - typename LayerType, - armnn::DataType DataType> -static void NeonCreateArithmethicWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph); - - DescriptorType queueDescriptor = workload->GetData(); - auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) -{ - NeonCreateArithmethicWorkloadTest<NeonAdditionFloatWorkload, - AdditionQueueDescriptor, - AdditionLayer, - DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) -{ - NeonCreateArithmethicWorkloadTest<NeonAdditionFloatWorkload, - AdditionQueueDescriptor, - AdditionLayer, - DataType::Float32>(); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload) -{ - NeonCreateArithmethicWorkloadTest<NeonSubtractionFloatWorkload, - SubtractionQueueDescriptor, - SubtractionLayer, - DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) -{ - NeonCreateArithmethicWorkloadTest<NeonSubtractionFloatWorkload, - SubtractionQueueDescriptor, - SubtractionLayer, - DataType::Float32>(); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload) -{ - NeonCreateArithmethicWorkloadTest<NeonMultiplicationFloatWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) -{ - NeonCreateArithmethicWorkloadTest<NeonMultiplicationFloatWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - DataType::Float32>(); -} - -template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType> -static void NeonCreateBatchNormalizationWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). - BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 1, 1}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3, 1, 1}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload) -{ - NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatWorkload) -{ - NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloatWorkload, DataType::Float32>(); -} - -template <typename Convolution2dWorkloadType, typename armnn::DataType DataType> -static void NeonCreateConvolution2dWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateConvolution2dWorkloadTest<Convolution2dWorkloadType, - DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). - Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 2, 2, 10}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload) -{ - NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatWorkload) -{ - NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloatWorkload, DataType::Float32>(); -} - -template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType> -static void NeonCreateFullyConnectedWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, - DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). - FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload) -{ - NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload) -{ - NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedFloatWorkload, DataType::Float32>(); -} - -template <typename NormalizationWorkloadType, typename armnn::DataType DataType> -static void NeonCreateNormalizationWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest). - NormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 5, 5, 1}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 5, 5, 1}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload) -{ - NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateNormalizationFloatWorkload) -{ - NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(); -} - -template <typename Pooling2dWorkloadType, typename armnn::DataType DataType> -static void NeonCreatePooling2dWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreatePooling2dWorkloadTest<Pooling2dWorkloadType, DataType> - (factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest). - Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 2, 5, 5}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 2, 2, 4}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload) -{ - NeonCreatePooling2dWorkloadTest<NeonPooling2dFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloatWorkload) -{ - NeonCreatePooling2dWorkloadTest<NeonPooling2dFloatWorkload, DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) -{ - NeonCreatePooling2dWorkloadTest<NeonPooling2dUint8Workload, DataType::QuantisedAsymm8>(); -} - -template <typename ReshapeWorkloadType, typename armnn::DataType DataType> -static void NeonCreateReshapeWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). - ReshapeQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) -{ - NeonCreateReshapeWorkloadTest<NeonReshapeFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload) -{ - NeonCreateReshapeWorkloadTest<NeonReshapeFloatWorkload, DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) -{ - NeonCreateReshapeWorkloadTest<NeonReshapeUint8Workload, DataType::QuantisedAsymm8>(); -} - -template <typename SoftmaxWorkloadType, typename armnn::DataType DataType> -static void NeonCreateSoftmaxWorkloadTest() -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest). - SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload) -{ - NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float16>(); -} -#endif - -BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload) -{ - NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) -{ - Graph graph; - NeonWorkloadFactory factory; - auto workload = CreateSplitterWorkloadTest<NeonSplitterFloatWorkload, DataType::Float32>(factory, graph); - - // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). - SplitterQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32))); - - auto outputHandle0 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32))); - - auto outputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32))); - - auto outputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[2]); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32))); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterMerger) -{ - // Tests that it is possible to decide which output of the splitter layer - // should be lined to which input of the merger layer. - // We tested that is is possible to specify 0th output - // of the splitter to be the 1st input to the merger, and the 1st output of the splitter to be 0th input - // of the merger. - - Graph graph; - NeonWorkloadFactory factory; - - auto workloads = - CreateSplitterMergerWorkloadTest<NeonSplitterFloatWorkload, NeonMergerFloatWorkload, - DataType::Float32>(factory, graph); - - auto wlSplitter = std::move(workloads.first); - auto wlMerger = std::move(workloads.second); - - //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. - armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::INeonTensorHandle* mIn0 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[0]); - armnn::INeonTensorHandle* mIn1 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[1]); - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(mIn0); - BOOST_TEST(mIn1); - - bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); - - BOOST_TEST(validDataPointers); -} - -BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) -{ - // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. - // We created a splitter with two outputs. That each of those outputs is used by two different activation layers - - Graph graph; - NeonWorkloadFactory factory; - std::unique_ptr<NeonSplitterFloatWorkload> wlSplitter; - std::unique_ptr<NeonActivationFloatWorkload> wlActiv0_0; - std::unique_ptr<NeonActivationFloatWorkload> wlActiv0_1; - std::unique_ptr<NeonActivationFloatWorkload> wlActiv1_0; - std::unique_ptr<NeonActivationFloatWorkload> wlActiv1_1; - - CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterFloatWorkload, - NeonActivationFloatWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, - wlActiv1_0, wlActiv1_1); - - armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::INeonTensorHandle* activ0_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); - armnn::INeonTensorHandle* activ0_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); - armnn::INeonTensorHandle* activ1_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); - armnn::INeonTensorHandle* activ1_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); - - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(activ0_0Im); - BOOST_TEST(activ0_1Im); - BOOST_TEST(activ1_0Im); - BOOST_TEST(activ1_1Im); - - bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && - (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); - - BOOST_TEST(validDataPointers); -} - -BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon) -{ - NeonWorkloadFactory factory; - CreateMemCopyWorkloads<INeonTensorHandle>(factory); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadRef.cpp b/src/armnn/backends/test/CreateWorkloadRef.cpp deleted file mode 100644 index 41419dafd0..0000000000 --- a/src/armnn/backends/test/CreateWorkloadRef.cpp +++ /dev/null @@ -1,478 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "backends/RefWorkloadFactory.hpp" -#include "backends/RefWorkloads.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "test/CreateWorkload.hpp" - -namespace -{ - -template<typename Workload> -void CheckInputOutput(std::unique_ptr<Workload> workload, const TensorInfo& inputInfo, const TensorInfo& outputInfo) -{ - auto queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST((inputHandle->GetTensorInfo() == inputInfo)); - BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); -} - -template <typename Workload> -void CheckInputsOutput(std::unique_ptr<Workload> workload, - const TensorInfo& inputInfo0, - const TensorInfo& inputInfo1, - const TensorInfo& outputInfo) -{ - auto queueDescriptor = workload->GetData(); - auto inputHandle0 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto inputHandle1 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[1]); - auto outputHandle = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST((inputHandle0->GetTensorInfo() == inputInfo0)); - BOOST_TEST((inputHandle1->GetTensorInfo() == inputInfo1)); - BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); -} -} - -BOOST_AUTO_TEST_SUITE(CreateWorkloadRef) - -template <typename ActivationWorkloadType, armnn::DataType DataType> -static void RefCreateActivationWorkloadTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateActivationWorkloadTest<ActivationWorkloadType, DataType>(factory, graph); - - // Checks that outputs are as we expect them (see definition of CreateActivationWorkloadTest). - CheckInputOutput(std::move(workload), - TensorInfo({ 1, 1 }, DataType), - TensorInfo({ 1, 1 }, DataType)); -} - -BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload) -{ - RefCreateActivationWorkloadTest<RefActivationFloat32Workload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateActivationUint8Workload) -{ - RefCreateActivationWorkloadTest<RefActivationUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -template <typename WorkloadType, - typename DescriptorType, - typename LayerType, - armnn::DataType DataType> -static void RefCreateArithmethicWorkloadTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph); - - CheckInputsOutput(std::move(workload), - TensorInfo({ 2, 3 }, DataType), - TensorInfo({ 2, 3 }, DataType), - TensorInfo({ 2, 3 }, DataType)); -} - -BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) -{ - RefCreateArithmethicWorkloadTest<RefAdditionFloat32Workload, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateAdditionUint8Workload) -{ - RefCreateArithmethicWorkloadTest<RefAdditionUint8Workload, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::QuantisedAsymm8>(); -} - -BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) -{ - RefCreateArithmethicWorkloadTest<RefSubtractionFloat32Workload, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload) -{ - RefCreateArithmethicWorkloadTest<RefSubtractionUint8Workload, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::QuantisedAsymm8>(); -} - -BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) -{ - RefCreateArithmethicWorkloadTest<RefMultiplicationFloat32Workload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload) -{ - RefCreateArithmethicWorkloadTest<RefMultiplicationUint8Workload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::QuantisedAsymm8>(); -} - -BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkload) -{ - RefCreateArithmethicWorkloadTest<RefDivisionFloat32Workload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateDivisionUint8Workload) -{ - RefCreateArithmethicWorkloadTest<RefDivisionUint8Workload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::QuantisedAsymm8>(); -} - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateBatchNormalizationWorkloadTest<RefBatchNormalizationFloat32Workload, armnn::DataType::Float32> - (factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). - CheckInputOutput( - std::move(workload), TensorInfo({2, 3, 1, 1}, DataType::Float32), TensorInfo({2, 3, 1, 1}, DataType::Float32)); -} - -BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Float32Workload) -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateConvertFp16ToFp32WorkloadTest<RefConvertFp16ToFp32Workload>(factory, graph); - - // Checks that outputs and inputs are as we expect them - CheckInputOutput( - std::move(workload), TensorInfo({1, 3, 2, 3}, DataType::Float16), TensorInfo({1, 3, 2, 3}, DataType::Float32)); -} - -BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Float16Workload) -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateConvertFp32ToFp16WorkloadTest<RefConvertFp32ToFp16Workload>(factory, graph); - - // Checks that outputs and inputs are as we expect them - CheckInputOutput( - std::move(workload), TensorInfo({1, 3, 2, 3}, DataType::Float32), TensorInfo({1, 3, 2, 3}, DataType::Float16)); -} - -BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateConvolution2dWorkloadTest<RefConvolution2dFloat32Workload, - DataType::Float32>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). - CheckInputOutput(std::move(workload), - TensorInfo({2, 3, 8, 16}, DataType::Float32), - TensorInfo({2, 2, 2, 10}, DataType::Float32)); -} - -BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolution2dWorkload) -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = - CreateDepthwiseConvolution2dWorkloadTest<RefDepthwiseConvolution2dFloat32Workload>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). - CheckInputOutput(std::move(workload), - TensorInfo({2, 3, 8, 16}, DataType::Float32), - TensorInfo({2, 9, 2, 10}, DataType::Float32)); -} - -template <typename FullyConnectedWorkloadType, armnn::DataType DataType> -static void RefCreateFullyConnectedWorkloadTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). - float inputsQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 1.0f : 0.0; - float outputQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 2.0f : 0.0; - CheckInputOutput(std::move(workload), - TensorInfo({ 3, 1, 4, 5 }, DataType, inputsQScale), - TensorInfo({ 3, 7 }, DataType, outputQScale)); -} - -BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32Workload) -{ - RefCreateFullyConnectedWorkloadTest<RefFullyConnectedFloat32Workload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateFullyConnectedUint8Workload) -{ - RefCreateFullyConnectedWorkloadTest<RefFullyConnectedUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateNormalizationWorkloadTest<RefNormalizationFloat32Workload, - armnn::DataType::Float32>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest). - CheckInputOutput(std::move(workload), - TensorInfo({3, 5, 5, 1}, DataType::Float32), - TensorInfo({3, 5, 5, 1}, DataType::Float32)); -} - -template <typename Pooling2dWorkloadType, armnn::DataType DataType> -static void RefCreatePooling2dWorkloadTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreatePooling2dWorkloadTest<Pooling2dWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest). - CheckInputOutput( - std::move(workload), - TensorInfo({3, 2, 5, 5}, DataType), - TensorInfo({3, 2, 2, 4}, DataType)); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload) -{ - RefCreatePooling2dWorkloadTest<RefPooling2dFloat32Workload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) -{ - RefCreatePooling2dWorkloadTest<RefPooling2dUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -template <typename SoftmaxWorkloadType, armnn::DataType DataType> -static void RefCreateSoftmaxWorkloadTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest). - CheckInputOutput( - std::move(workload), - TensorInfo({4, 1}, DataType), - TensorInfo({4, 1}, DataType)); -} - -BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32Workload) -{ - RefCreateSoftmaxWorkloadTest<RefSoftmaxFloat32Workload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSoftmaxUint8Workload) -{ - RefCreateSoftmaxWorkloadTest<RefSoftmaxUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -template <typename SplitterWorkloadType, armnn::DataType DataType> -static void RefCreateSplitterWorkloadTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateSplitterWorkloadTest<SplitterWorkloadType, DataType>(factory, graph); - - // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). - SplitterQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, DataType))); - - auto outputHandle0 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, DataType))); - - auto outputHandle1 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType))); - - auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[2]); - BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType))); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload) -{ - RefCreateSplitterWorkloadTest<RefSplitterFloat32Workload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterUint8Workload) -{ - RefCreateSplitterWorkloadTest<RefSplitterUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -template <typename SplitterWorkloadType, typename MergerWorkloadType, armnn::DataType DataType> -static void RefCreateSplitterMergerWorkloadTest() -{ - // Tests that it is possible to decide which output of the splitter layer - // should be lined to which input of the merger layer. - // We tested that is is possible to specify 0th output - // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input - // of the merger. - - Graph graph; - RefWorkloadFactory factory; - auto workloads = CreateSplitterMergerWorkloadTest<SplitterWorkloadType, MergerWorkloadType, DataType> - (factory, graph); - - auto wlSplitter = std::move(workloads.first); - auto wlMerger = std::move(workloads.second); - - //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. - armnn::CpuTensorHandle* sOut0 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::CpuTensorHandle* sOut1 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::CpuTensorHandle* mIn0 = dynamic_cast<armnn::CpuTensorHandle*>(wlMerger->GetData().m_Inputs[0]); - armnn::CpuTensorHandle* mIn1 = dynamic_cast<armnn::CpuTensorHandle*>(wlMerger->GetData().m_Inputs[1]); - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(mIn0); - BOOST_TEST(mIn1); - - bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); - - BOOST_TEST(validDataPointers); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat32) -{ - RefCreateSplitterMergerWorkloadTest<RefSplitterFloat32Workload, RefMergerFloat32Workload, DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterMergerUint8) -{ - RefCreateSplitterMergerWorkloadTest<RefSplitterUint8Workload, RefMergerUint8Workload, DataType::QuantisedAsymm8>(); -} - -template <typename SplitterWorkloadType, typename ActivationWorkloadType, armnn::DataType DataType> -static void RefCreateSingleOutputMultipleInputsTest() -{ - // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. - // We created a splitter with two outputs. That each of those outputs is used by two different activation layers. - - Graph graph; - RefWorkloadFactory factory; - std::unique_ptr<SplitterWorkloadType> wlSplitter; - std::unique_ptr<ActivationWorkloadType> wlActiv0_0; - std::unique_ptr<ActivationWorkloadType> wlActiv0_1; - std::unique_ptr<ActivationWorkloadType> wlActiv1_0; - std::unique_ptr<ActivationWorkloadType> wlActiv1_1; - - CreateSplitterMultipleInputsOneOutputWorkloadTest<SplitterWorkloadType, - ActivationWorkloadType, DataType>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); - - armnn::CpuTensorHandle* sOut0 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::CpuTensorHandle* sOut1 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::CpuTensorHandle* activ0_0Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); - armnn::CpuTensorHandle* activ0_1Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); - armnn::CpuTensorHandle* activ1_0Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); - armnn::CpuTensorHandle* activ1_1Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); - - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(activ0_0Im); - BOOST_TEST(activ0_1Im); - BOOST_TEST(activ1_0Im); - BOOST_TEST(activ1_1Im); - - bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && - (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); - - BOOST_TEST(validDataPointers); -} - -BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsFloat32) -{ - RefCreateSingleOutputMultipleInputsTest<RefSplitterFloat32Workload, RefActivationFloat32Workload, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsUint8) -{ - RefCreateSingleOutputMultipleInputsTest<RefSplitterUint8Workload, RefActivationUint8Workload, - armnn::DataType::QuantisedAsymm8>(); -} - -template <typename ResizeBilinearWorkloadType, armnn::DataType DataType> -static void RefCreateResizeBilinearTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateResizeBilinearWorkloadTest<ResizeBilinearWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest). - CheckInputOutput( - std::move(workload), - TensorInfo({ 2, 3, 4, 4 }, DataType), - TensorInfo({ 2, 3, 2, 2 }, DataType)); -} - -BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32) -{ - RefCreateResizeBilinearTest<RefResizeBilinearFloat32Workload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateResizeBilinearUint8) -{ - RefCreateResizeBilinearTest<RefResizeBilinearUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat32) -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateL2NormalizationWorkloadTest<RefL2NormalizationFloat32Workload, armnn::DataType::Float32> - (factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateL2NormalizationWorkloadTest). - CheckInputOutput( - std::move(workload), - TensorInfo({ 5, 20, 50, 67 }, armnn::DataType::Float32), - TensorInfo({ 5, 20, 50, 67 }, armnn::DataType::Float32)); -} - -template <typename ReshapeWorkloadType, armnn::DataType DataType> -static void RefCreateReshapeWorkloadTest() -{ - Graph graph; - RefWorkloadFactory factory; - auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). - CheckInputOutput( - std::move(workload), - TensorInfo({ 4, 1 }, DataType), - TensorInfo({ 1, 4 }, DataType)); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) -{ - RefCreateReshapeWorkloadTest<RefReshapeFloat32Workload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) -{ - RefCreateReshapeWorkloadTest<RefReshapeUint8Workload, armnn::DataType::QuantisedAsymm8>(); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/FullyConnectedTestImpl.hpp b/src/armnn/backends/test/FullyConnectedTestImpl.hpp deleted file mode 100644 index 125b7e62b1..0000000000 --- a/src/armnn/backends/test/FullyConnectedTestImpl.hpp +++ /dev/null @@ -1,287 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -template<typename T, typename B> -LayerTestResult<T, 2> SimpleFullyConnectedTestImpl( - armnn::IWorkloadFactory& workloadFactory, - armnn::TensorInfo inputTensorInfo, - armnn::TensorInfo outputTensorInfo, - armnn::TensorInfo weightsDesc, - armnn::TensorInfo biasesDesc, - boost::multi_array<T, 2>& weights, - boost::multi_array<B, 1>& bias, - boost::multi_array<T, 4>& input, - bool biasEnabled, - bool transposeWeights) -{ - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::FullyConnectedQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle weightsTensor(weightsDesc); - armnn::ScopedCpuTensorHandle biasTensor(biasesDesc); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, &weights[0][0]); - AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; - data.m_Parameters.m_BiasEnabled = biasEnabled; - data.m_Parameters.m_TransposeWeightMatrix = transposeWeights; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFullyConnected(data, info); - LayerTestResult<T, 2> result(outputTensorInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get()); - - return result; -} - -LayerTestResult<float, 2> FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled, - bool transposeWeights) -{ - unsigned int inputWidth = 1; - unsigned int inputHeight = 1; - unsigned int inputChannels = 5; - unsigned int inputNum = 2; - - unsigned int outputChannels = 3; - unsigned int outputNum = 2; - - // Define the tensor descriptors. - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - armnn::TensorInfo weightsDesc; - armnn::TensorInfo biasesDesc; - - unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; - unsigned int outputShape[] = { outputNum, outputChannels }; - unsigned int weightsShape[] = { inputChannels, outputChannels }; - if (transposeWeights) - { - std::swap(weightsShape[0], weightsShape[1]); - } - unsigned int biasShape[] = { outputChannels }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32); - weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32); - biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32); - - LayerTestResult<float, 2> result(outputTensorInfo); - - boost::multi_array<float, 4> input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>( - { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - - 5.0f, 4.0f, 3.0f, 2.0f, 1.0f - }) - ); - - boost::multi_array<float, 2> weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>( - { - .5f, 2.f, .5f, - .5f, 2.f, 1.f, - .5f, 2.f, 2.f, - .5f, 2.f, 3.f, - .5f, 2.f, 4.f - })); - - if (transposeWeights) - { - weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>( - { - .5f, .5f, .5f, .5f, .5f, - 2.f, 2.f, 2.f, 2.f, 2.f, - .5f, 1.f, 2.f, 3.f, 4.f - })); - } - - - std::vector<float> biasValues({0.f, 0.f, 0.f}); - if (biasEnabled) - { - biasValues = std::vector<float>({10.f, 20.f, 30.f}); - } - boost::multi_array<float, 1> bias = MakeTensor<float, 1>(biasesDesc, biasValues); - - result = SimpleFullyConnectedTestImpl<float>( - workloadFactory, - inputTensorInfo, outputTensorInfo, - weightsDesc, biasesDesc, - weights, bias, input, - biasEnabled, transposeWeights - ); - - result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, std::vector<float>( - { - 0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0], - 2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1], - 0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2], - - 2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0], - 10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1], - 2.5f + 4.0f + 6.0f + 6.f + 4.f + biasValues[2] - }) - ); - - return result; -} - -LayerTestResult<uint8_t, 2> FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) -{ - constexpr static unsigned int inputWidth = 3u; - constexpr static unsigned int inputHeight = 2u; - constexpr static unsigned int inputChannels = 1u; - - constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels; - - constexpr static unsigned int outputChannels = 2u; - - armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(0.1f); - inputTensorInfo.SetQuantizationOffset(63); - - armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(5.f); - outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10); - - armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, armnn::DataType::QuantisedAsymm8); - weightsDesc.SetQuantizationScale(0.2f); - weightsDesc.SetQuantizationOffset(93); - - armnn::TensorInfo biasesDesc({ outputChannels }, armnn::DataType::Signed32); - biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale()); - biasesDesc.SetQuantizationOffset(0); - - LayerTestResult<uint8_t, 2> result(outputTensorInfo); - - auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>{51, 124, 28, - 251, 8, 92}); - - auto weights = MakeTensor<uint8_t, 2>(weightsDesc, std::vector<uint8_t>{51, 193, 42, 53, 175, 34, - 210, 145, 23, 74, 34, 150}); - - // scale = 0.02 - // offset = 0 - auto bias = MakeTensor<int32_t, 1>(biasesDesc, std::vector<int32_t>{9250, 67500}); - - result = SimpleFullyConnectedTestImpl<uint8_t>( - workloadFactory, - inputTensorInfo, outputTensorInfo, - weightsDesc, biasesDesc, - weights, bias, input, - biasEnabled, true - ); - - // Manually calculated. - // Note one of these values has been clamped to 0. - if (biasEnabled) - { - result.outputExpected = MakeTensor<uint8_t, 2>(outputTensorInfo, std::vector<uint8_t>{0, 242}); - } - else - { - result.outputExpected = MakeTensor<uint8_t, 2>(outputTensorInfo, std::vector<uint8_t>{0, 32}); - } - - return result; -} - - - -// -// ArmNN variant of the AndroidNN fully_connected_float_large test. -// -// Tests the fully connected layer with large values, optionally transposing weights. -// Note this is templated for consistency, but the nature of this tests makes it unlikely to be useful in Uint8 mode. -// -template<typename T> -LayerTestResult<T, 2> FullyConnectedLargeTestCommon(armnn::IWorkloadFactory& workloadFactory, - bool transposeWeights, - float qScale = 0.0f, - int32_t qOffset = 0) -{ - unsigned int inputWidth = 1; - unsigned int inputHeight = 1; - unsigned int inputChannels = 5; - unsigned int inputNum = 1; - - unsigned int outputChannels = 1; - unsigned int outputNum = 1; - - // Define the tensor descriptors. - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - armnn::TensorInfo weightsDesc; - armnn::TensorInfo biasesDesc; - - unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; - unsigned int outputShape[] = { outputNum, outputChannels }; - unsigned int weightsShape[] = { inputChannels, outputChannels }; - if (transposeWeights) - { - std::swap(weightsShape[0], weightsShape[1]); - } - - unsigned int biasShape[] = { outputChannels }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); - outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::GetDataType<T>()); - weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::GetDataType<T>()); - biasesDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - LayerTestResult<T, 2> result(outputTensorInfo); - - boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f, - }) - ); - - boost::multi_array<T, 2> weights = MakeTensor<T, 2>(weightsDesc, - QuantizedVector<T>(qScale, qOffset, { - 2.0f, 3.0f, 4.0f, 5.0f, 6.0f - }) - ); - - std::vector<T> biasValues({900000.f}); - boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasesDesc, biasValues); - - result = SimpleFullyConnectedTestImpl<T>( - workloadFactory, - inputTensorInfo, outputTensorInfo, - weightsDesc, biasesDesc, - weights, bias, input, - true, transposeWeights - ); - - result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 965432.0f, - }) - ); - - return result; -} diff --git a/src/armnn/backends/test/IsLayerSupportedTest.cpp b/src/armnn/backends/test/IsLayerSupportedTest.cpp deleted file mode 100644 index 97d3de5e38..0000000000 --- a/src/armnn/backends/test/IsLayerSupportedTest.cpp +++ /dev/null @@ -1,239 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include <boost/test/unit_test.hpp> - -#include "test/TensorHelpers.hpp" -#include "LayerTests.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/RefWorkloadFactory.hpp" - -#include <string> -#include <iostream> -#include <backends/ClWorkloadFactory.hpp> -#include <backends/NeonWorkloadFactory.hpp> - -#include "IsLayerSupportedTestImpl.hpp" -#include "ClContextControlFixture.hpp" - -#include "layers/ConvertFp16ToFp32Layer.hpp" -#include "layers/ConvertFp32ToFp16Layer.hpp" - -BOOST_AUTO_TEST_SUITE(IsLayerSupported) - -BOOST_AUTO_TEST_CASE(IsLayerSupportedLayerTypeMatches) -{ - LayerTypeMatchesTest(); -} - -BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Reference) -{ - armnn::RefWorkloadFactory factory; - IsLayerSupportedTests<armnn::RefWorkloadFactory, armnn::DataType::Float16>(&factory); -} - -BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Reference) -{ - armnn::RefWorkloadFactory factory; - IsLayerSupportedTests<armnn::RefWorkloadFactory, armnn::DataType::Float32>(&factory); -} - -BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Reference) -{ - armnn::RefWorkloadFactory factory; - IsLayerSupportedTests<armnn::RefWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedReference) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float16, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedFp32InputReference) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float32 data type input"); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedFp16OutputReference) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float16, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float16 data type output"); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedReference) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float32, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedFp16InputReference) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float16, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float16 data type input"); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedFp32OutputReference) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float32 data type output"); -} - -#ifdef ARMCOMPUTENEON_ENABLED -BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Neon) -{ - armnn::NeonWorkloadFactory factory; - IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::Float16>(&factory); -} - -BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Neon) -{ - armnn::NeonWorkloadFactory factory; - IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::Float32>(&factory); -} - -BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Neon) -{ - armnn::NeonWorkloadFactory factory; - IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedNeon) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float16, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedNeon) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float32, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(result); -} -#endif //#ifdef ARMCOMPUTENEON_ENABLED. - - -#ifdef ARMCOMPUTECL_ENABLED - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat16Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory; - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float16>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat32Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory; - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float32>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedUint8Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory; - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float16, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp32InputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float16"); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp16OutputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float16, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float32"); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float32, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp16InputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float16, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float32"); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp32OutputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float16"); -} -#endif //#ifdef ARMCOMPUTECL_ENABLED. - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp deleted file mode 100644 index c5389df06e..0000000000 --- a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp +++ /dev/null @@ -1,565 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "Graph.hpp" - -#include <boost/core/ignore_unused.hpp> - -namespace -{ -armnn::Graph dummyGraph; - -// Make a dummy TensorInfo object. -template<armnn::DataType DataType> -armnn::TensorInfo MakeDummyTensorInfo() -{ - return armnn::TensorInfo({2,2,2,2}, DataType); -} - - -// Make a dummy WorkloadInfo using a dummy TensorInfo. -template<armnn::DataType DataType> -armnn::WorkloadInfo MakeDummyWorkloadInfo(unsigned int numInputs, unsigned int numOutputs) -{ - armnn::WorkloadInfo info; - for (unsigned int i=0; i < numInputs; i++) - { - info.m_InputTensorInfos.push_back(MakeDummyTensorInfo<DataType>()); - } - for (unsigned int o=0; o < numOutputs; o++) - { - info.m_OutputTensorInfos.push_back(MakeDummyTensorInfo<DataType>()); - } - return info; -} - -// Template class to create a dummy layer (2 parameters). -template<typename LayerType, typename DescType = typename LayerType::DescriptorType> -struct DummyLayer -{ - DummyLayer() - { - m_Layer = dummyGraph.AddLayer<LayerType>(DescType(), ""); - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - LayerType* m_Layer; -}; - -// Template class to create a dummy layer (1 parameter). -template<typename LayerType> -struct DummyLayer<LayerType, void> -{ - DummyLayer() - { - m_Layer = dummyGraph.AddLayer<LayerType>(""); - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - LayerType* m_Layer; -}; - -template<> -struct DummyLayer<armnn::BatchNormalizationLayer> -{ - DummyLayer() - { - m_Layer = dummyGraph.AddLayer<armnn::BatchNormalizationLayer>(armnn::BatchNormalizationDescriptor(), ""); - m_Layer->m_Mean = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_Variance = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_Beta = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_Gamma = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::BatchNormalizationLayer* m_Layer; - -}; - -template<> -struct DummyLayer<armnn::ConstantLayer, void> -{ - DummyLayer() - { - m_Layer = dummyGraph.AddLayer<armnn::ConstantLayer>(""); - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::ConstantLayer* m_Layer; -}; - -template<> -struct DummyLayer<armnn::InputLayer, armnn::LayerBindingId> -{ - DummyLayer() - { - m_Layer = dummyGraph.AddLayer<armnn::InputLayer>(armnn::LayerBindingId(), ""); - - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::InputLayer* m_Layer; -}; - -template<> -struct DummyLayer<armnn::MergerLayer> -{ - DummyLayer() - { - armnn::OriginsDescriptor desc(2); - m_Layer = dummyGraph.AddLayer<armnn::MergerLayer>(desc, ""); - - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::MergerLayer* m_Layer; -}; - -template<> -struct DummyLayer<armnn::OutputLayer, armnn::LayerBindingId> -{ - DummyLayer() - { - m_Layer = dummyGraph.AddLayer<armnn::OutputLayer>(armnn::LayerBindingId(), ""); - - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::OutputLayer* m_Layer; -}; - -template<> -struct DummyLayer<armnn::SplitterLayer> -{ - DummyLayer() - { - armnn::ViewsDescriptor desc(1); - m_Layer = dummyGraph.AddLayer<armnn::SplitterLayer>(desc, ""); - - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::SplitterLayer* m_Layer; -}; - -template <typename ConvolutionLayerType> -struct DummyConvolutionLayer -{ - DummyConvolutionLayer() - { - typename ConvolutionLayerType::DescriptorType desc; - m_Layer = dummyGraph.AddLayer<ConvolutionLayerType>(desc, ""); - m_Layer->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_Bias = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - } - ~DummyConvolutionLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - ConvolutionLayerType* m_Layer; -}; - -template<> -struct DummyLayer<armnn::Convolution2dLayer> - : public DummyConvolutionLayer<armnn::Convolution2dLayer> -{ -}; - -template<> -struct DummyLayer<armnn::DepthwiseConvolution2dLayer> - : public DummyConvolutionLayer<armnn::DepthwiseConvolution2dLayer> -{ -}; - -template <typename LstmLayerType> -struct DummyLstmLayer -{ - DummyLstmLayer() - { - typename LstmLayerType::DescriptorType desc; - desc.m_CifgEnabled = false; - - m_Layer = dummyGraph.AddLayer<LstmLayerType>(armnn::LstmDescriptor(), ""); - m_Layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_CellBias = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_BasicParameters.m_OutputGateBias = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - - m_Layer->m_CifgParameters.m_InputToInputWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_CifgParameters.m_RecurrentToInputWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_CifgParameters.m_CellToInputWeights = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - m_Layer->m_CifgParameters.m_InputGateBias = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - } - ~DummyLstmLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::LstmLayer* m_Layer; -}; - -template<> -struct DummyLayer<armnn::LstmLayer> - : public DummyLstmLayer<armnn::LstmLayer> -{ -}; - -template<> -struct DummyLayer<armnn::FullyConnectedLayer> -{ - DummyLayer() - { - armnn::FullyConnectedLayer::DescriptorType desc; - m_Layer = dummyGraph.AddLayer<armnn::FullyConnectedLayer>(desc, ""); - m_Layer->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>( - armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); - } - ~DummyLayer() - { - dummyGraph.EraseLayer(m_Layer); - } - armnn::FullyConnectedLayer* m_Layer; -}; - -// Tag for giving LayerType entries a unique strong type each. -template<armnn::LayerType> -struct Tag{}; - -#define DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, descType) \ -template<armnn::DataType DataType> \ -struct LayerTypePolicy<armnn::LayerType::name, DataType> \ -{ \ - using Type = armnn::name##Layer; \ - using Desc = descType; \ - using QueueDesc = armnn::name##QueueDescriptor; \ - constexpr static const char* NameStr = #name; \ - \ - static std::unique_ptr<armnn::IWorkload> MakeDummyWorkload(armnn::IWorkloadFactory *factory, \ - unsigned int nIn, unsigned int nOut) \ - { \ - QueueDesc desc; \ - armnn::WorkloadInfo info = MakeDummyWorkloadInfo<DataType>(nIn, nOut); \ - return factory->Create##name(desc, info); \ - } \ -}; - -// Define a layer policy specialization for use with the IsLayerSupported tests. -// Use this version for layers whose constructor takes 1 parameter(name). -#define DECLARE_LAYER_POLICY_1_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, void) - -// Define a layer policy specialization for use with the IsLayerSupported tests. -// Use this version for layers whose constructor takes 2 parameters(descriptor and name). -#define DECLARE_LAYER_POLICY_2_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, armnn::name##Descriptor) - -// Layer policy template. -template<armnn::LayerType Type, armnn::DataType DataType> -struct LayerTypePolicy; - -// Every entry in the armnn::LayerType enum must be accounted for below. -DECLARE_LAYER_POLICY_2_PARAM(Activation) - -DECLARE_LAYER_POLICY_1_PARAM(Addition) - -DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization) - -DECLARE_LAYER_POLICY_1_PARAM(Constant) - -DECLARE_LAYER_POLICY_1_PARAM(ConvertFp16ToFp32) - -DECLARE_LAYER_POLICY_1_PARAM(ConvertFp32ToFp16) - -DECLARE_LAYER_POLICY_2_PARAM(Convolution2d) - -DECLARE_LAYER_POLICY_1_PARAM(MemCopy) - -DECLARE_LAYER_POLICY_2_PARAM(DepthwiseConvolution2d) - -DECLARE_LAYER_POLICY_2_PARAM(FakeQuantization) - -DECLARE_LAYER_POLICY_1_PARAM(Floor) - -DECLARE_LAYER_POLICY_2_PARAM(FullyConnected) - -DECLARE_LAYER_POLICY_CUSTOM_PARAM(Input, armnn::LayerBindingId) - -DECLARE_LAYER_POLICY_1_PARAM(L2Normalization) - -DECLARE_LAYER_POLICY_2_PARAM(Lstm) - -DECLARE_LAYER_POLICY_2_PARAM(Mean) - -DECLARE_LAYER_POLICY_2_PARAM(Merger) - -DECLARE_LAYER_POLICY_1_PARAM(Multiplication) - -DECLARE_LAYER_POLICY_2_PARAM(Normalization) - -DECLARE_LAYER_POLICY_CUSTOM_PARAM(Output, armnn::LayerBindingId) - -DECLARE_LAYER_POLICY_2_PARAM(Permute) - -DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) - -DECLARE_LAYER_POLICY_1_PARAM(Division) - -DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) - -DECLARE_LAYER_POLICY_2_PARAM(Reshape) - -DECLARE_LAYER_POLICY_2_PARAM(Softmax) - -DECLARE_LAYER_POLICY_2_PARAM(Splitter) - -DECLARE_LAYER_POLICY_1_PARAM(Subtraction) - - -// Generic implementation to get the number of input slots for a given layer type; -template<armnn::LayerType Type> -unsigned int GetNumInputs(const armnn::Layer& layer) -{ - return layer.GetNumInputSlots(); -} - -// Generic implementation to get the number of output slots for a given layer type; -template<armnn::LayerType Type> -unsigned int GetNumOutputs(const armnn::Layer& layer) -{ - return layer.GetNumOutputSlots(); -} - -template<> -unsigned int GetNumInputs<armnn::LayerType::Merger>(const armnn::Layer& layer) -{ - boost::ignore_unused(layer); - return 2; -} - -// Tests that the IsLayerSupported() function returns the correct value. -// We determined the correct value by *trying* to create the relevant workload and seeing if it matches what we expect. -// Returns true if expectations are met, otherwise returns false. -template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> -bool IsLayerSupportedTest(FactoryType *factory, Tag<Type>) -{ - using LayerPolicy = LayerTypePolicy<Type, DataType>; - using LayerType = typename LayerPolicy::Type; - using LayerDesc = typename LayerPolicy::Desc; - DummyLayer<LayerType, LayerDesc> layer; - - unsigned int numIn = GetNumInputs<Type>(*layer.m_Layer); - unsigned int numOut = GetNumOutputs<Type>(*layer.m_Layer); - - // Make another dummy layer just to make IsLayerSupported have valid inputs. - DummyLayer<armnn::ConstantLayer, void> previousLayer; - // Set output of the previous layer to a dummy tensor. - armnn::TensorInfo output = MakeDummyTensorInfo<DataType>(); - previousLayer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); - // Connect all outputs of the previous layer to inputs of tested layer. - for (unsigned int i = 0; i < numIn; i++) - { - armnn::IOutputSlot& previousLayerOutputSlot = previousLayer.m_Layer->GetOutputSlot(0); - armnn::IInputSlot& layerInputSlot = layer.m_Layer->GetInputSlot(i); - previousLayerOutputSlot.Connect(layerInputSlot); - } - // Set outputs of tested layer to a dummy tensor. - for (unsigned int i = 0; i < numOut; i++) - { - layer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); - } - - std::string layerName = LayerPolicy::NameStr; - std::string reasonIfUnsupported; - if (FactoryType::IsLayerSupported(*layer.m_Layer, DataType, reasonIfUnsupported)) - { - std::string errorMsg = " layer expected support but found none."; - try - { - bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() != nullptr; - // hacky way (it has to be replaced): for Lstm, we only support F32 right now -// BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); - return retVal; - } - catch(const armnn::InvalidArgumentException& e) - { - boost::ignore_unused(e); - // This is ok since we throw InvalidArgumentException when creating the dummy workload. - return true; - } - catch(const std::exception& e) - { - errorMsg = e.what(); - BOOST_TEST_ERROR(layerName << ": " << errorMsg); - return false; - } - catch(...) - { - errorMsg = "Unexpected error while testing support for "; - BOOST_TEST_ERROR(errorMsg << layerName); - return false; - } - } - else - { - std::string errorMsg = "layer expected no support (giving reason: " + reasonIfUnsupported + ") but found some."; - try - { - bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() == nullptr; - BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); - return retVal; - } - // These two exceptions are ok: For workloads that are partially supported, attempting to instantiate them - // using parameters that make IsLayerSupported() return false should throw an - // InvalidArgumentException or UnimplementedException. - catch(const armnn::InvalidArgumentException& e) - { - boost::ignore_unused(e); - return true; - } - catch(const armnn::UnimplementedException& e) - { - boost::ignore_unused(e); - return true; - } - catch(const std::exception& e) - { - errorMsg = e.what(); - BOOST_TEST_ERROR(layerName << ": " << errorMsg); - return false; - } - catch(...) - { - errorMsg = "Unexpected error while testing support for "; - BOOST_TEST_ERROR(errorMsg << layerName); - return false; - } - } -} - -// Helper function to compute the next type in the LayerType enum. -constexpr armnn::LayerType NextType(armnn::LayerType type) -{ - return static_cast<armnn::LayerType>(static_cast<int>(type)+1); -} - -// Termination function for determining the end of the LayerType enumeration. -template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> -bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag<armnn::LayerType::LastLayer>) -{ - return IsLayerSupportedTest<FactoryType, DataType, Type>(factory, Tag<Type>()); -}; - -// Recursive function to test and enter in the LayerType enum and then iterate on the next entry. -template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> -bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag<Type>) -{ - bool v = IsLayerSupportedTest<FactoryType, DataType, Type>(factory, Tag<Type>()); - - return v && - IsLayerSupportedTestsImpl<FactoryType, DataType, NextType(Type)> - (factory, Tag<NextType(Type)>()); -}; - -// Helper function to pass through to the test framework. -template<typename FactoryType, armnn::DataType DataType> -bool IsLayerSupportedTests(FactoryType *factory) -{ - return IsLayerSupportedTestsImpl<FactoryType, DataType>(factory, Tag<armnn::LayerType::FirstLayer>()); -}; - -template<armnn::LayerType Type> -bool TestLayerTypeMatches() -{ - using LayerPolicy = LayerTypePolicy<Type, armnn::DataType::Float32>; - using LayerType = typename LayerPolicy::Type; - using LayerDesc = typename LayerPolicy::Desc; - DummyLayer<LayerType, LayerDesc> layer; - - std::stringstream ss; - ss << LayerPolicy::NameStr << " layer type mismatches expected layer type value."; - bool v = Type == layer.m_Layer->GetType(); - BOOST_CHECK_MESSAGE(v, ss.str()); - return v; -}; - -template<armnn::LayerType Type> -bool LayerTypeMatchesTestImpl(Tag<armnn::LayerType::LastLayer>) -{ - return TestLayerTypeMatches<Type>(); -}; - -template<armnn::LayerType Type> -bool LayerTypeMatchesTestImpl(Tag<Type>) -{ - return TestLayerTypeMatches<Type>() && - LayerTypeMatchesTestImpl<NextType(Type)>(Tag<NextType(Type)>()); -}; - -bool LayerTypeMatchesTest() -{ - return LayerTypeMatchesTestImpl<armnn::LayerType::FirstLayer>(Tag<armnn::LayerType::FirstLayer>()); -}; - -template<typename FactoryType, typename LayerType, armnn::DataType InputDataType , armnn::DataType OutputDataType> -bool IsConvertLayerSupportedTests(std::string& reasonIfUnsupported) -{ - armnn::Graph graph; - LayerType* const layer = graph.AddLayer<LayerType>("LayerName"); - - armnn::Layer* const input = graph.AddLayer<armnn::InputLayer>(0, "input"); - armnn::Layer* const output = graph.AddLayer<armnn::OutputLayer>(0, "output"); - - armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, InputDataType); - armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, OutputDataType); - - input->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - input->GetOutputHandler(0).SetTensorInfo(inputTensorInfo); - layer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - layer->GetOutputHandler(0).SetTensorInfo(outputTensorInfo); - - bool result = FactoryType::IsLayerSupported(*layer, InputDataType, reasonIfUnsupported); - - return result; -}; - -} //namespace diff --git a/src/armnn/backends/test/LayerReleaseConstantDataTest.cpp b/src/armnn/backends/test/LayerReleaseConstantDataTest.cpp deleted file mode 100644 index 7566c72352..0000000000 --- a/src/armnn/backends/test/LayerReleaseConstantDataTest.cpp +++ /dev/null @@ -1,212 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <boost/test/unit_test.hpp> -#include <boost/cast.hpp> - -#include "backends/WorkloadData.hpp" -#include "Graph.hpp" - -#include <utility> - -#include "backends/CpuTensorHandle.hpp" -#include "backends/ClWorkloadFactory.hpp" - -using namespace armnn; -using namespace std; - -// connects two layers -void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0) -{ - from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex)); - from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo); -} - -///////////////////////////////////////////////////////////////////////////////////////////// -// The following test are created specifically to test ReleaseConstantData() method in the Layer -// They build very simple graphs including the layer will be checked. -// Checks weights and biases before the method called and after. -///////////////////////////////////////////////////////////////////////////////////////////// - -BOOST_AUTO_TEST_SUITE(LayerReleaseConstantDataTest) - -BOOST_AUTO_TEST_CASE(ReleaseBatchNormalizationLayerConstantDataTest) -{ - Graph graph; - ClWorkloadFactory factory; - - // create the layer we're testing - BatchNormalizationDescriptor layerDesc; - layerDesc.m_Eps = 0.05f; - BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer"); - - armnn::TensorInfo weightInfo({3}, armnn::DataType::Float32); - layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(weightInfo); - layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(weightInfo); - layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(weightInfo); - layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(weightInfo); - layer->m_Mean->Allocate(); - layer->m_Variance->Allocate(); - layer->m_Beta->Allocate(); - layer->m_Gamma->Allocate(); - - // create extra layers - Layer* const input = graph.AddLayer<InputLayer>(0, "input"); - Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - - // connect up - armnn::TensorInfo tensorInfo({2, 3, 1, 1}, armnn::DataType::Float32); - Connect(input, layer, tensorInfo); - Connect(layer, output, tensorInfo); - - // check the constants that they are not NULL - BOOST_CHECK(layer->m_Mean != nullptr); - BOOST_CHECK(layer->m_Variance != nullptr); - BOOST_CHECK(layer->m_Beta != nullptr); - BOOST_CHECK(layer->m_Gamma != nullptr); - - // free up the constants.. - layer->ReleaseConstantData(); - - // check the constants that they are NULL now - BOOST_CHECK(layer->m_Mean == nullptr); - BOOST_CHECK(layer->m_Variance == nullptr); - BOOST_CHECK(layer->m_Beta == nullptr); - BOOST_CHECK(layer->m_Gamma == nullptr); - - } - - - BOOST_AUTO_TEST_CASE(ReleaseConvolution2dLayerConstantDataTest) - { - Graph graph; - ClWorkloadFactory factory; - - // create the layer we're testing - Convolution2dDescriptor layerDesc; - layerDesc.m_PadLeft = 3; - layerDesc.m_PadRight = 3; - layerDesc.m_PadTop = 1; - layerDesc.m_PadBottom = 1; - layerDesc.m_StrideX = 2; - layerDesc.m_StrideY = 4; - layerDesc.m_BiasEnabled = true; - - Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer"); - - layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3}, - armnn::DataType::Float32)); - layer->m_Bias = std::make_unique<ScopedCpuTensorHandle> - (TensorInfo({2}, GetBiasDataType(armnn::DataType::Float32))); - - layer->m_Weight->Allocate(); - layer->m_Bias->Allocate(); - - // create extra layers - Layer* const input = graph.AddLayer<InputLayer>(0, "input"); - Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - - // connect up - Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); - Connect(layer, output, TensorInfo({2, 2, 2, 10}, armnn::DataType::Float32)); - - // check the constants that they are not NULL - BOOST_CHECK(layer->m_Weight != nullptr); - BOOST_CHECK(layer->m_Bias != nullptr); - - // free up the constants.. - layer->ReleaseConstantData(); - - // check the constants that they are NULL now - BOOST_CHECK(layer->m_Weight == nullptr); - BOOST_CHECK(layer->m_Bias == nullptr); -} - -BOOST_AUTO_TEST_CASE(ReleaseDepthwiseConvolution2dLayerConstantDataTest) -{ - Graph graph; - ClWorkloadFactory factory; - - // create the layer we're testing - DepthwiseConvolution2dDescriptor layerDesc; - layerDesc.m_PadLeft = 3; - layerDesc.m_PadRight = 3; - layerDesc.m_PadTop = 1; - layerDesc.m_PadBottom = 1; - layerDesc.m_StrideX = 2; - layerDesc.m_StrideY = 4; - layerDesc.m_BiasEnabled = true; - - DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer"); - - layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({3, 3, 5, 3}, DataType::Float32)); - layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({9}, DataType::Float32)); - layer->m_Weight->Allocate(); - layer->m_Bias->Allocate(); - - // create extra layers - Layer* const input = graph.AddLayer<InputLayer>(0, "input"); - Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - - // connect up - Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); - Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32)); - - // check the constants that they are not NULL - BOOST_CHECK(layer->m_Weight != nullptr); - BOOST_CHECK(layer->m_Bias != nullptr); - - // free up the constants.. - layer->ReleaseConstantData(); - - // check the constants that they are NULL now - BOOST_CHECK(layer->m_Weight == nullptr); - BOOST_CHECK(layer->m_Bias == nullptr); -} - -BOOST_AUTO_TEST_CASE(ReleaseFullyConnectedLayerConstantDataTest) -{ - Graph graph; - ClWorkloadFactory factory; - - // create the layer we're testing - FullyConnectedDescriptor layerDesc; - layerDesc.m_BiasEnabled = true; - layerDesc.m_TransposeWeightMatrix = true; - - FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer"); - - float inputsQScale = 1.0f; - float outputQScale = 2.0f; - - layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7, 20}, - DataType::QuantisedAsymm8, inputsQScale, 0)); - layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7}, - GetBiasDataType(DataType::QuantisedAsymm8), inputsQScale)); - layer->m_Weight->Allocate(); - layer->m_Bias->Allocate(); - - // create extra layers - Layer* const input = graph.AddLayer<InputLayer>(0, "input"); - Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - - // connect up - Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType::QuantisedAsymm8, inputsQScale)); - Connect(layer, output, TensorInfo({3, 7}, DataType::QuantisedAsymm8, outputQScale)); - - // check the constants that they are not NULL - BOOST_CHECK(layer->m_Weight != nullptr); - BOOST_CHECK(layer->m_Bias != nullptr); - - // free up the constants.. - layer->ReleaseConstantData(); - - // check the constants that they are NULL now - BOOST_CHECK(layer->m_Weight == nullptr); - BOOST_CHECK(layer->m_Bias == nullptr); -} - -BOOST_AUTO_TEST_SUITE_END() - diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp deleted file mode 100644 index 4dcc36fdb2..0000000000 --- a/src/armnn/backends/test/LayerTests.cpp +++ /dev/null @@ -1,4750 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "LayerTests.hpp" - -#include "test/TensorHelpers.hpp" -#include "TensorCopyUtils.hpp" -#include "Permute.hpp" - -#include <boost/test/unit_test.hpp> -#include <boost/assert.hpp> - -#include "armnn/LayerSupport.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -#ifdef ARMCOMPUTECL_ENABLED -#include "backends/ClTensorHandle.hpp" -#include "backends/ArmComputeTensorUtils.hpp" -#endif - -#include <algorithm> -#include <boost/cast.hpp> - -#include "WorkloadTestUtils.hpp" -#include "Conv2dTestImpl.hpp" -#include "BatchNormTestImpl.hpp" -#include "ActivationTestImpl.hpp" -#include "Pooling2dTestImpl.hpp" -#include "ReshapeTestImpl.hpp" -#include "FullyConnectedTestImpl.hpp" -#include "SplitterTestImpl.hpp" -#include "SoftmaxTestImpl.hpp" -#include "NormTestImpl.hpp" -#include "PermuteTestImpl.hpp" -#include "LstmTestImpl.hpp" -#include "ConvertFp16ToFp32TestImpl.hpp" -#include "ConvertFp32ToFp16TestImpl.hpp" - -// 3-channel 16x8 image used as common input data for a number of Conv2d tests. -static std::vector<float> ConvInput3x8x16({ - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -}); - -// 2-channel bias used by a number of Conv2d tests. -static std::vector<float> Bias2({0, 2}); - -// Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled. -template<typename T> -boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale, int32_t qOffset) -{ - if(biasEnabled) - { - armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, armnn::GetDataType<T>()); - boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, qOffset, Bias2)); - return bias; - } - else - { - return boost::multi_array<T, 1>(); - } -} - -template<typename T> -LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset, - bool biasEnabled) -{ - // Use common single-batch 3-channel 16x8 image. - armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); - - // Use a 2-element batch with 3-channel 3x5 kernels. - armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 1, 1, 1, - 1, -1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - - - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0 - }))); - - // Expected output is 2 batch elements of a 1-channel 14x4 image. - armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, - -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, - -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, - -23.5f, -23.5f, -23.5f, - -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, - -23.5f, -23.5f, -23.5f, - - 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }))); - - return SimpleConvolution2dTestImpl<T>(workloadFactory, - input, - kernel, - GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), - expectedOutput, - qScale, - qOffset); -} - -template<typename T> -LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset, - bool biasEnabled) -{ - // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path. - - // Use common single-batch 3-channel 16x8 image. - armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); - - // Use a 2-element batch of 3-channel 3x3 kernels. - armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 1, 1, 1, - 1, -1, 1, - 1, 1, 1, - - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - - - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - - 0, 0, 0, - 0, 0, 0, - 0, 0, 0 - }))); - - // Expected output is 1 batch of a 2-channel 14x6 image. - armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, - -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, - -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, - -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, - -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, - - 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }))); - - return SimpleConvolution2dTestImpl<T>(workloadFactory, - input, - kernel, - GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), - expectedOutput, - qScale, - qOffset); -} - -LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled); -} - -LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled); -} - -LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return SimpleConvolution2d3x3TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled); -} - -LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return SimpleConvolution2d3x3TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled); -} - -template<typename T> -LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon( - armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset) -{ - // Use a single-batch 1-channel 3x3 image as input. - armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 11,21,31, - 12,22,32, - 13,23,33 - }))); - - // Use 1 batch of a 1-channel 2x2 kernel. - armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - -11,-21, - -12,-22, - }))); - -// Expected output is 1 batch of a 1-channel 6x8 image. -// Manually calculated like this: -//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] -//[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..] -//[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..] -//[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..] -//[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..] -//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] -//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..] - armnn::TensorInfo outputDesc({1, 1, 8, 6}, armnn::GetDataType<T>()); - boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 0, 0, 0, 0, 0, 0, - -242, -594, -934, -372, 0, 0, - -495, -1190, -1850, -725, 0, 0, - -538, -1256, -1916, -748, 0, 0, - -273, -626, -946, -363, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 - }))); - - return SimpleConvolution2dTestImpl<T>(workloadFactory, - input, - kernel, - GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset), - expectedOutput, - qScale, - qOffset, - 1, // Padding left. - 2, // Padding top. - 3, // Padding right. - 4); // Padding bottom. -} - -template<typename T> -LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset) -{ - // Use a single-batch 1-channel 5x5 image as input. - armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>()); - boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 11,21,31,41,51, - 12,22,32,42,52, - 13,23,33,43,53, - 14,24,34,44,54, - 15,25,35,45,55, - }))); - - // Use 1 batch of a 1-channel 4x4 kernel. - armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - -11,-21,-31,-41, - -12,-22,-32,-42, - -13,-23,-33,-43, - -14,-24,-34,-44, - }))); - - // Expected output is 1 batch of a 1-channel 5x5 image. - armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>()); - std::vector<T> myVec(outputDesc.GetNumElements(), 0); - boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - -7140, -10580, -13940, -9300, -5230, - -9590, -14120, -18520, -12290, -6860, - -9980, -14560, -18960, -12560, -7000, - -7518, -10904, -14144, -9318, -5152, - -5032, -7256, -9376, -6142, -3368, - }))); - - return SimpleConvolution2dTestImpl<T>(workloadFactory, - input, - kernel, - GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset), - expectedOutput, - qScale, - qOffset, - 1, // Padding left. - 1, // Padding top. - 2, // Padding right. - 2); // Padding bottom. -} - -template<typename T> -LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset, - bool biasEnabled) -{ - // Use a single-batch 2-channel 5x5 image as input. - armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( - QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { - 0, 1, 2, 3, 4, - 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, - - 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49 - }))); - - // Use a depth multiplier of 1 on a 2-channel 4x4 kernel. - armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>()); - auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>( - QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), { - 32, 31, 30, 29, - 28, 27, 26, 25, - 24, 23, 22, 21, - 20, 19, 18, 17, - - 16, 15, 14, 13, - 12, 11, 10, 9, - 8, 7, 6, 5, - 4, 3, 2, 1 - }))); - - // Expected output is 1 batch of a 2-channel 5x5 image. - // Calculated using the python tensorflow library with strideX=1, strideY=1. - armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>()); - boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>( - QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { - 1062, 1580, 1850, 1530, 1117, - 2140, 3108, 3500, 2842, 2042, - 3580, 5068, 5460, 4342, 3062, - 3618, 5072, 5390, 4248, 2971, - 3074, 4282, 4510, 3533, 2457, - 1550, 2284, 2362, 1955, 1428, - 2910, 4206, 4342, 3528, 2536, - 3390, 4886, 5022, 4068, 2916, - 3566, 5056, 5182, 4133, 2922, - 3100, 4352, 4452, 3517, 2465 - }))); - - return DepthwiseConvolution2dAsymmetricTestImpl<T>(workloadFactory, - input, - kernel, - GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), - expectedOutput, - qScale, - qOffset, - 1, // Padding left. - 1, // Padding top. - 2, // Padding right. - 2, // Padding bottom. - 1, // strideX - 1); // strideY -} - -LayerTestResult<float, 4> -Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory) -{ - return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory) -{ - return SimpleConvolution2dAsymmetricPaddingTestCommon<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return DepthwiseConvolution2dTestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled); -} - -LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled); -} - -LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return DepthwiseConvolution2dAsymmetricTestCommon<float>(workloadFactory, 0.0f, 0, biasEnabled); -} - -LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return DepthwiseConvolution2dTestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled); -} - -LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled) -{ - return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled); -} - -LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) -{ - return Convolution1dTestImpl<float>(workloadFactory, 0.0f, 0, biasEnabled); -} - -LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) -{ - return Convolution1dTestImpl<uint8_t>(workloadFactory, 0.1f, 128, biasEnabled); -} - -LayerTestResult<float,4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory) -{ - return CompareConvolution2dTestImpl<float>(workloadFactory, refWorkloadFactory); -} - -template<typename T> -LayerTestResult<T,4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory) -{ - return CompareDepthwiseConvolution2dTestImpl<T>(workloadFactory, refWorkloadFactory); -} - -template LayerTestResult<float, 4> CompareDepthwiseConvolution2dTest<float>( - armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); -template LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dTest<uint8_t>( - armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); - -LayerTestResult<float,4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory) -{ - auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; - auto normChannel = armnn::NormalizationAlgorithmChannel::Across; - return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); -} - -LayerTestResult<float,4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory) -{ - auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; - auto normChannel = armnn::NormalizationAlgorithmChannel::Within; - return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); -} - -LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta) -{ - return SimpleSoftmaxTestImpl<float>(workloadFactory, beta); -} - -LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta) -{ - return SimpleSoftmaxTestImpl<uint8_t>(workloadFactory, beta); -} - -LayerTestResult<float,4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::NormalizationAlgorithmChannel normChannel, - armnn::NormalizationAlgorithmMethod normMethod) -{ - return CompareNormalizationTestImpl(workloadFactory, refWorkloadFactory, normChannel, normMethod); -} - -LayerTestResult<float,2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - float beta) -{ - return CompareSoftmaxTestImpl<float>(workloadFactory, refWorkloadFactory, beta); -} - -LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - float beta) -{ - return CompareSoftmaxTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, beta); -} - -std::vector<LayerTestResult<float,3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory) -{ - return SplitterTestCommon<float>(workloadFactory); -} - -std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return SplitterTestCommon<uint8_t>(workloadFactory, 1.0f, 0); -} - -LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory) -{ - return CopyViaSplitterTestImpl<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return CopyViaSplitterTestImpl<uint8_t>(workloadFactory, 1.0f, 0); -} - -LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest( - armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputDesc({ 2, 2 }, armnn::GetDataType<float>()); - boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>( - { 2., 3., 3., 4. })); - - armnn::TensorInfo outputDesc({ 2, 4 }, armnn::GetDataType<float>()); - boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>( - {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f, - -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f})); - return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput); -} - -LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest( - armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputDesc({ 2, 5 }, armnn::GetDataType<float>()); - boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>( - {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f, - 0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f})); - - armnn::TensorInfo outputDesc({ 2, 16 }, armnn::GetDataType<float>()); - boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>( - {-0.00396806f, 0.029352f, -0.00279226f, 0.0159977f, -0.00835576f, - -0.0211779f, 0.0283512f, -0.0114597f, 0.00907307f, -0.0244004f, - -0.0152191f, -0.0259063f, 0.00914318f, 0.00415118f, 0.017147f, - 0.0134203f, -0.013869f, 0.0287268f, -0.00334693f, 0.00733398f, -0.0287926f, - -0.0186926f, 0.0193662f, -0.0115437f, 0.00422612f, -0.0345232f, - 0.00223253f, -0.00957321f, 0.0210624f, 0.013331f, 0.0150954f, - 0.02168f})); - return LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(workloadFactory, input, expectedOutput); -} - -LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputDesc({2, 2}, armnn::GetDataType<float>()); - boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>( - {2., 3., 3., 4.})); - - - armnn::TensorInfo outputDesc({2, 4}, armnn::GetDataType<float>()); - boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>( - {{-0.02973187f, 0.1229473f, 0.20885126f, -0.15358765f, - -0.0185422f, 0.11281417f, 0.24466537f, -0.1826292f}})); - - return LstmNoCifgNoPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput); -} - -LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int outputWidth = 3; - unsigned int outputHeight = 6; - unsigned int outputChannels = 3; - - unsigned int inputWidth1 = 3; - unsigned int inputHeight1 = 6; - unsigned int inputChannels1 = 2; - - unsigned int inputWidth2 = 3; - unsigned int inputHeight2 = 6; - unsigned int inputChannels2 = 1; - - // Define the tensor descriptors. - armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32); - armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32); - armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32); - - LayerTestResult<float,3> ret(outputTensorInfo); - - ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>( - { - 1.0f, 2.0f, 3.0f, - 4.0f, 5.0f, 6.0f, - 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, - 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, - - 19.0f, 20.0f, 21.0f, - 22.0f, 23.0f, 24.0f, - 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, - 31.0f, 32.0f, 33.0f, - 34.0f, 35.0f, 36.0f, - - 37.0f, 38.0f, 39.0f, - 40.0f, 41.0f, 42.0f, - 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, - 49.0f, 50.0f, 51.0f, - 52.0f, 53.0f, 54.0f, - }) - ); - - auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>( - { - 1.0f, 2.0f, 3.0f, - 4.0f, 5.0f, 6.0f, - 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, - 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, - - 19.0f, 20.0f, 21.0f, - 22.0f, 23.0f, 24.0f, - 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, - 31.0f, 32.0f, 33.0f, - 34.0f, 35.0f, 36.0f, - }) - ); - - auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>( - { - 37.0f, 38.0f, 39.0f, - 40.0f, 41.0f, 42.0f, - 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, - 49.0f, 50.0f, 51.0f, - 52.0f, 53.0f, 54.0f, - }) - ); - - std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0]. - armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - - std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1]. - armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); - - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - bool subTensorsSupported = workloadFactory.SupportsSubTensors(); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo1); - - std::unique_ptr<armnn::ITensorHandle> inputHandle2 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo2); - - armnn::MergerQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - data.m_ViewOrigins.push_back(window1); - data.m_ViewOrigins.push_back(window2); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); - - inputHandle1->Allocate(); - inputHandle2->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); - - return ret; -} - -LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int batchSize = 2; - unsigned int channels = 2; - unsigned int height = 2; - unsigned int width = 3; - - armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; - armnn::TensorInfo outputTensorInfo; - - unsigned int shape[] = {batchSize, channels, height, width}; - - inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - - - auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>( - { - 0.0f, 2.0f, 1.0f, - 0.2f, 1.0f, 2.0f, - - 1.0f, 2.0f, 1.0f, - 0.2f, 1.0f, 2.0f, - - 0.0f, 2.0f, 1.0f, - 4.2f, 1.0f, 2.0f, - - 0.0f, 0.0f, 1.0f, - 0.2f, 1.0f, 2.0f, - })); - - auto input2 = MakeTensor<float, 4>(inputTensorInfo2, std::vector<float>( - { - 1.0f, 2.0f, 1.0f, - 0.0f, 1.0f, 2.0f, - - 1.0f, 2.0f, -2.0f, - 0.2f, 1.0f, 2.0f, - - 0.0f, 2.0f, 1.0f, - 4.2f, 0.0f, -3.0f, - - 0.0f, 0.0f, 1.0f, - 0.7f, 1.0f, 5.0f, - })); - - LayerTestResult<float,4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>( - { - 1.0f, 4.0f, 2.0f, - 0.2f, 2.0f, 4.0f, - - 2.0f, 4.0f, -1.0f, - 0.4f, 2.0f, 4.0f, - - 0.0f, 4.0f, 2.0f, - 8.4f, 1.0f, -1.0f, - - 0.0f, 0.0f, 2.0f, - 0.9f, 2.0f, 7.0f, - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::AdditionQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); - - inputHandle1->Allocate(); - inputHandle2->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -template <typename T> -LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset) -{ - armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, armnn::GetDataType<T>()); - armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); - - if (armnn::IsQuantizedType<T>()) - { - inputTensorInfo1.SetQuantizationScale(qScale); - inputTensorInfo1.SetQuantizationOffset(qOffset); - inputTensorInfo2.SetQuantizationScale(qScale); - inputTensorInfo2.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, - { - 0.0f, - 1.0f, - - 2.0f, - 3.0f, - - 4.0f, - 5.0f, - })); - - auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset, - { - 0.5f, 1.5f, 2.5f, - 3.5f, 4.5f, 5.5f, - })); - - LayerTestResult<T,4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, - { - 0.5f, 1.5f, 2.5f, - 4.5f, 5.5f, 6.5f, - - 2.5f, 3.5f, 4.5f, - 6.5f, 7.5f, 8.5f, - - 4.5f, 5.5f, 6.5f, - 8.5f, 9.5f, 10.5f, - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::AdditionQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); - - inputHandle1->Allocate(); - inputHandle2->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -template <typename T> -LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset) -{ - armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); - armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); - - if (armnn::IsQuantizedType<T>()) - { - inputTensorInfo1.SetQuantizationScale(qScale); - inputTensorInfo1.SetQuantizationOffset(qOffset); - inputTensorInfo2.SetQuantizationScale(qScale); - inputTensorInfo2.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, - { - 0.0f, 1.0f, 2.0f, - 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, - 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, - 15.0f, 16.0f, 17.0f, - })); - - auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset, - { - 0.5f, - })); - - LayerTestResult<T,4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, - { - 0.5f, 1.5f, 2.5f, - 3.5f, 4.5f, 5.5f, - 6.5f, 7.5f, 8.5f, - 9.5f, 10.5f, 11.5f, - 12.5f, 13.5f, 14.5f, - 15.5f, 16.5f, 17.5f, - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::AdditionQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); - - inputHandle1->Allocate(); - inputHandle2->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory) -{ - return AdditionBroadcastTestImpl<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return AdditionBroadcastTestImpl<uint8_t>(workloadFactory, 2.f, 0); -} - -LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) -{ - return AdditionBroadcast1ElementTestImpl<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return AdditionBroadcast1ElementTestImpl<uint8_t>(workloadFactory, 0.1333333f, 128); -} - -LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory) -{ - unsigned int batchSize = 4; - unsigned int channels = 1; - unsigned int height = 2; - unsigned int width = 3; - - armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; - armnn::TensorInfo outputTensorInfo; - - unsigned int shape[] = {batchSize, channels, height, width}; - - inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - - auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 1232); - auto input2 = MakeRandomTensor<float, 4>(inputTensorInfo2, 456); - - LayerTestResult<float,4> ret(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::AdditionQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - armnn::AdditionQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get()); - SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo); - - inputHandle1->Allocate(); - inputHandle2->Allocate(); - outputHandle->Allocate(); - inputHandle1Ref->Allocate(); - inputHandle2Ref->Allocate(); - outputHandleRef->Allocate(); - - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); - CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - refWorkloadFactory.Finalize(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); - - return ret; -} - -namespace { -template <typename T> -LayerTestResult<T, 4> DivisionTestHelper(armnn::IWorkloadFactory& workloadFactory, - const unsigned int shape0[4], - const std::vector<T>& values0, - float scale0, - int32_t offset0, - const unsigned int shape1[4], - const std::vector<T> & values1, - float scale1, - int32_t offset1, - const unsigned int outShape[4], - const std::vector<T> & outValues, - float outScale, - int32_t outOffset) -{ - auto dataType = (std::is_same<T, uint8_t>::value ? - armnn::DataType::QuantisedAsymm8 : - armnn::DataType::Float32); - - armnn::TensorInfo inputTensorInfo0(4, shape0, dataType); - armnn::TensorInfo inputTensorInfo1(4, shape1, dataType); - armnn::TensorInfo outputTensorInfo(4, outShape, dataType); - - inputTensorInfo0.SetQuantizationScale(scale0); - inputTensorInfo0.SetQuantizationOffset(offset0); - - inputTensorInfo1.SetQuantizationScale(scale1); - inputTensorInfo1.SetQuantizationOffset(offset1); - - outputTensorInfo.SetQuantizationScale(outScale); - outputTensorInfo.SetQuantizationOffset(outOffset); - - auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0); - auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1); - - LayerTestResult<T, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues); - - std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::DivisionQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDivision(data, info); - - inputHandle0->Allocate(); - inputHandle1->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - - return result; -} -} // anonymous namespace - -LayerTestResult<float,4> DivisionByZeroTest(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int width = 2; - const unsigned int height = 2; - const unsigned int channelCount = 2; - const unsigned int batchSize = 2; - - unsigned int shape[] = { batchSize, channelCount, height, width }; - - std::vector<float> input0({ - 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 0.f, 0.f, - -1.f, -1.f, -1.f, -1.f, 5.f, 5.f, 5.f, 5.f }); - - std::vector<float> input1({ - 0.f, 0.f, -0.f, -0.f, 0.f, 0.f, -0.f, -0.f, - 0.f, 0.f, -0.f, -0.f, 5.f, 5.f, 5.f, 5.f }); - - std::vector<float> output({ - INFINITY, INFINITY, -INFINITY, -INFINITY, NAN, NAN, -NAN, -NAN, - -INFINITY, -INFINITY, INFINITY, INFINITY, 1, 1, 1, 1 }); - - return DivisionTestHelper<float>(workloadFactory, - shape, input0, 1.0f, 0, - shape, input1, 1.0f, 0, - shape, output, 1.0f, 0); -} - -LayerTestResult<float,4> DivisionTest(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int width = 2; - const unsigned int height = 2; - const unsigned int channelCount = 2; - const unsigned int batchSize = 2; - - unsigned int shape[] = { batchSize, channelCount, height, width }; - - std::vector<float> input0({ - 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5 }); - - std::vector<float> input1({ - 1, 1, 1, 1, 2, 2, 2, 2, - 4, 4, 4, 4, 4, 4, 4, 4 }); - - std::vector<float> output({ - 2, 2, 2, 2, 1.5, 1.5, 1.5, 1.5, - 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.25 }); - - - return DivisionTestHelper<float>(workloadFactory, - shape, input0, 1.0f, 0, - shape, input1, 1.0f, 0, - shape, output, 1.0f, 0); -} - -LayerTestResult<float, 4> DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int shape0[] = { 1, 2, 2, 2 }; - std::vector<float> input0({ 2, 4, 6, 8, 10, 12, 14, 16}); - - unsigned int shape1[] = { 1, 1, 1, 1 }; - std::vector<float> input1({ 2 }); - - std::vector<float> output({ 1, 2, 3, 4, 5, 6, 7, 8}); - - - return DivisionTestHelper<float>(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -LayerTestResult<float, 4> DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int shape0[] = { 1, 3, 3, 2 }; - std::vector<float> input0({ - 1, 4, 3, 8, 5, 12, - 7, 16, 9, 20, 11, 24, - 13, 28, 15, 32, 17, 36}); - - unsigned int shape1[] = { 1, 1, 1, 2 }; - std::vector<float> input1({ 1, 2 }); - - std::vector<float> output({ - 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18}); - - return DivisionTestHelper<float>(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - - -LayerTestResult<uint8_t,4> DivisionUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int width = 2; - const unsigned int height = 2; - const unsigned int channelCount = 2; - const unsigned int batchSize = 2; - - unsigned int shape[] = { batchSize, channelCount, height, width }; - - std::vector<uint8_t> input0({2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5 }); - - std::vector<uint8_t> input1({1, 1, 1, 1, 2, 2, 2, 2, - 4, 4, 4, 4, 4, 4, 4, 4 }); - - std::vector<uint8_t> output({8, 8, 8, 8, 6, 6, 6, 6, - 4, 4, 4, 4, 5, 5, 5, 5}); - - - return DivisionTestHelper<uint8_t>(workloadFactory, - shape, input0, 1.0f, 0, - shape, input1, 1.0f, 0, - shape, output, 0.25f, 0); -} - -LayerTestResult<uint8_t, 4> DivisionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int shape0[] = { 1, 2, 2, 2 }; - std::vector<uint8_t> input0({ 2, 4, 6, 8, 10, 12, 14, 16}); - - unsigned int shape1[] = { 1, 1, 1, 1 }; - std::vector<uint8_t> input1({ 2 }); - - std::vector<uint8_t> output({ 1, 2, 3, 4, 5, 6, 7, 8}); - - return DivisionTestHelper<uint8_t>(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -LayerTestResult<uint8_t, 4> DivisionBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int shape0[] = { 1, 3, 3, 2 }; - std::vector<uint8_t> input0({1, 4, 3, 8, 5, 12, - 7, 16, 9, 20, 11, 24, - 13, 28, 15, 32, 17, 36}); - - unsigned int shape1[] = { 1, 1, 1, 2 }; - std::vector<uint8_t> input1({ 1, 2 }); - - std::vector<uint8_t> output({1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18}); - - return DivisionTestHelper<uint8_t>(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -namespace { -LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory, - const unsigned int shape0[4], - const std::vector<float> & values0, - const unsigned int shape1[4], - const std::vector<float> & values1, - const unsigned int outShape[4], - const std::vector<float> & outValues) -{ - const size_t dimensionCount = 4; - armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32}; - armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32}; - armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32}; - - auto input0 = MakeTensor<float, 4>(inputTensorInfo0, values0); - auto input1 = MakeTensor<float, 4>(inputTensorInfo1, values1); - - LayerTestResult<float,4> ret(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::MultiplicationQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); - - inputHandle0->Allocate(); - inputHandle1->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outValues); - return ret; -} -} // anonymous namespace - - -LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int width = 2; - const unsigned int height = 2; - const unsigned int channelCount = 2; - const unsigned int batchSize = 2; - - unsigned int shape[] = { batchSize, channelCount, height, width }; - - std::vector<float> input0({ - 1, 1, 1, 1, 2, 2, 2, 2, - 3, 3, 3, 3, 4, 4, 4, 4 }); - - std::vector<float> input1({ - 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5 }); - - std::vector<float> output({ - 2, 2, 2, 2, 6, 6, 6, 6, - 12, 12, 12, 12, 20, 20, 20, 20 }); - - return MultiplicationTestHelper(workloadFactory, - shape, - input0, - shape, - input1, - shape, - output); -} - -LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int shape0[] = { 1, 2, 2, 2 }; - std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8}); - - unsigned int shape1[] = { 1, 1, 1, 1 }; - std::vector<float> input1({ 2 }); - - std::vector<float> output({ 2, 4, 6, 8, 10, 12, 14, 16}); - - return MultiplicationTestHelper(workloadFactory, - shape0, - input0, - shape1, - input1, - shape0, - output); -} - -LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int shape0[] = { 1, 3, 3, 2 }; - std::vector<float> input0({ - 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18}); - - unsigned int shape1[] = { 1, 1, 1, 2 }; - std::vector<float> input1({ 1, 2 }); - - std::vector<float> output({ - 1, 4, 3, 8, 5, 12, - 7, 16, 9, 20, 11, 24, - 13, 28, 15, 32, 17, 36}); - - return MultiplicationTestHelper(workloadFactory, - shape0, - input0, - shape1, - input1, - shape0, - output); -} - -LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory) -{ - const unsigned int width = 16; - const unsigned int height = 32; - const unsigned int channelCount = 2; - const unsigned int batchSize = 5; - - armnn::TensorInfo inputTensorInfo0; - armnn::TensorInfo inputTensorInfo1; - armnn::TensorInfo outputTensorInfo; - - constexpr unsigned int shape[] = { batchSize, channelCount, height, width }; - - inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - - LayerTestResult<float,4> comparisonResult(outputTensorInfo); - - auto input0 = MakeRandomTensor<float, 4>(inputTensorInfo0, 803506992); - auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 54902257); - - std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0); - std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::MultiplicationQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - armnn::MultiplicationQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get()); - SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo); - - inputHandle0->Allocate(); - inputHandle1->Allocate(); - outputHandle->Allocate(); - inputHandle0Ref->Allocate(); - inputHandle1Ref->Allocate(); - outputHandleRef->Allocate(); - - CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]); - CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - refWorkloadFactory.Finalize(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); - - return comparisonResult; -} - -LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory) -{ - const unsigned int width = 2; - const unsigned int height = 3; - const unsigned int channels = 5; - const unsigned int batchSize = 3; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - armnn::TensorInfo tensorInfo; - - constexpr unsigned int shape[] = {batchSize, channels, height, width}; - constexpr unsigned int tensorShape[] = {channels}; - - inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32); - - auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312); - - auto mean = MakeRandomTensor<float, 1>(tensorInfo, 123); - auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f); - auto beta = MakeRandomTensor<float, 1>(tensorInfo, 123); - auto gamma = MakeRandomTensor<float, 1>(tensorInfo, 345); - - LayerTestResult<float,4> ret(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::BatchNormalizationQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); - armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); - armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); - armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); - - AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); - AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); - AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); - AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Mean = &meanTensor; - data.m_Variance = &varianceTensor; - data.m_Beta = &betaTensor; - data.m_Gamma = &gammaTensor; - data.m_Parameters.m_Eps = 0.01f; - - armnn::BatchNormalizationQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - inputHandleRef->Allocate(); - outputHandleRef->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - refWorkloadFactory.Finalize(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); - - return ret; -} - -template<typename T> -void PermuteTensorData( - armnn::IWorkloadFactory& workloadFactory, - const armnn::PermutationVector& mappings, - armnn::TensorInfo & inputTensorInfo, - const T * inputData, - std::vector<T>& outputData) -{ - BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null"); - if (inputData == nullptr) - { - // Nullptr is an error in the test. By returning without doing the concatenation - // I expect the caller to fail the test. It still makes sense to report this as - // an assert for Debug builds. - return; - } - - armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::PermuteQueueDescriptor queueDescriptor; - queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings}; - armnn::WorkloadInfo workloadInfo; - AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), inputData); - - workload->Execute(); - - outputData.resize(outputTensorInfo.GetNumElements()); - CopyDataFromITensorHandle(&outputData[0], outputHandle.get()); - inputTensorInfo = outputTensorInfo; -} - -armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation( - const std::vector<armnn::TensorInfo> & inputTensorInfos, - unsigned int concatDim) -{ - std::vector<armnn::TensorShape> shapes; - shapes.reserve(inputTensorInfos.size()); - for (const armnn::TensorInfo& it: inputTensorInfos) - { - shapes.push_back(it.GetShape()); - } - - return armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), - shapes.end(), - concatDim); -} - -// -// Concatenation is only supported for N and C dimensions for NCHW. In case of -// <4 dimensions we need to make sure that the concat dimensions are at least -// the 3rd slowest iterating one. -// - -bool NeedPermuteForConcat( - const std::vector<armnn::TensorInfo> & inputTensorInfos, - unsigned int concatDim) -{ - // See note above. Additionally we expect the input shapes to have the - // same number of dimensions. - unsigned int nDimensions = 0; - - // Determine the number of dimensions as well as sanity check them - // agains test implementation issues. - for (auto && tensorInfo : inputTensorInfos) - { - if (!nDimensions) - { - nDimensions = tensorInfo.GetShape().GetNumDimensions(); - } - else - { - BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(), - "Input shapes must have the same number of dimensions"); - } - } - - return (nDimensions-concatDim) < 3; -} - -armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape) -{ - unsigned int numDims = inputShape.GetNumDimensions(); - if (numDims >= 3) - { - // Nothing to do if the inputShape has at least 3 dimensions. - return inputShape; - } - - std::vector<unsigned int> newDims(size_t(3), 1u); - unsigned int expandedBy = 3 - numDims; - for (unsigned int i=0; i<numDims; ++i) - { - newDims[expandedBy+i] = inputShape[i]; - } - return armnn::TensorShape(3u, &newDims[0]); -} - -void Generate3dPermuteVectorForConcat( - unsigned int numDimensions, - unsigned int & concatDim, - std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations) -{ - BOOST_ASSERT_MSG(numDimensions <= 3, - "Only dimensions 1,2 and 3 are supported by this helper"); - - unsigned int expandedBy = 3 - numDimensions; - unsigned int expandedConcatAxis = concatDim + expandedBy; - - if (expandedConcatAxis == 2) - { - concatDim = 0; - armnn::PermutationVector forwardPermutation({1, 2, 0}); - armnn::PermutationVector reversePermutation({2, 0, 1}); - permutations = std::make_pair(forwardPermutation, reversePermutation); - } - else if (expandedConcatAxis == 1) - { - concatDim = 0; - armnn::PermutationVector forwardPermutation({2, 0, 1}); - armnn::PermutationVector reversePermutation({1, 2, 0}); - permutations = std::make_pair(forwardPermutation, reversePermutation); - } - else - { - BOOST_ASSERT(expandedConcatAxis == 0); - concatDim = 0; - } -} - -// -// Permute the input tensors so we can do a supported concatenation. -// Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions -// at the front. Finally this function tells what the output shape -// of the permuted concatenated tensor is going to be. -// -template <typename T> -void PermuteInputsForConcat( - armnn::IWorkloadFactory& workloadFactory, - std::vector<armnn::TensorInfo> & inputTensorInfos, - std::vector<T *> & inputData, - std::vector<std::vector<T>> & inputDataStorage, - armnn::PermutationVector & permuteVector, - unsigned int & concatDim, - armnn::TensorInfo & outputTensorInfo) -{ - BOOST_ASSERT_MSG(inputTensorInfos.size() > 1, - "Expecting more than one tensor to be concatenated here"); - - unsigned int numDims = 0; - unsigned int nthInput = 0; - const armnn::PermutationVector identity({0, 1, 2}); - - std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations = - std::make_pair(identity, identity); - - inputDataStorage.resize(inputData.size()); - - for (auto && tensorInfo : inputTensorInfos) - { - if (numDims == 0) - { - numDims = tensorInfo.GetShape().GetNumDimensions(); - Generate3dPermuteVectorForConcat(numDims, concatDim, permutations); - // Store the reverese permutation. - permuteVector = permutations.second; - BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity), - "Test logic error, we don't need permutation, so we shouldn't arrive here"); - } - else - { - BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(), - "All inputs must have the same number of dimensions"); - } - - armnn::TensorInfo newTensorInfo = tensorInfo; - newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape())); - - PermuteTensorData<T>(workloadFactory, - permutations.first, - newTensorInfo, - inputData[nthInput], - inputDataStorage[nthInput]); - - inputData[nthInput] = inputDataStorage[nthInput].data(); - inputTensorInfos[nthInput] = newTensorInfo; - - ++nthInput; - } - - outputTensorInfo.SetShape( - armnnUtils::Permuted( - ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()), - permutations.first)); -} - - -// -// This is the pair of PermuteInputsForConcat(...) which permutes back -// the output of the concatenation so we can check it against an expected -// output. -// -template <typename T> -void PermuteOutputForConcat( - armnn::IWorkloadFactory& workloadFactory, - const armnn::TensorInfo & tensorInfo, - const armnn::PermutationVector & permuteVector, - std::unique_ptr<armnn::ITensorHandle> && inputDataHandle, - T * data) -{ - BOOST_ASSERT_MSG(data != nullptr, "data must not be null"); - if (data == nullptr) - { - // Nullptr is an error in the test. By returning without doing the permutation - // I expect the caller to fail the test. It still makes sense to report this as - // an assert for Debug builds. - return; - } - - armnn::TensorInfo resultTensorInfo = tensorInfo; - std::vector<T> inputData(tensorInfo.GetNumElements()); - std::vector<T> outputData; - - CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get()); - - PermuteTensorData<T>(workloadFactory, - permuteVector, - resultTensorInfo, - &inputData[0], - outputData); - - ::memcpy(data, &outputData[0], sizeof(T)*outputData.size()); -} - -template <typename T> -void Concatenate(armnn::IWorkloadFactory& workloadFactory, - std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig, - std::initializer_list<T *> inputsOrig, - const armnn::TensorInfo& outputTensorInfoOrig, - T * output, - unsigned int concatDim) -{ - BOOST_ASSERT_MSG(output != nullptr, "output must not be null"); - if (output == nullptr) - { - // Nullptr is an error in the test. By returning without doing the permutation - // I expect the caller to fail the test. It still makes sense to report this as - // an assert for Debug builds. - return; - } - - armnn::MergerQueueDescriptor queueDescriptor; - - // Saves a copy of the parameters which we might need to change. - std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end()); - std::vector<T *> inputs = inputsOrig; - armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig; - - armnn::PermutationVector permuteVector{0, 1, 2}; - - // Holds and automatically releases memory for the reshaped input data. - std::vector<std::vector<T>> tmpInputDataStorage; - - const size_t inputCount = inputTensorInfos.size(); - - bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim); - - if (needPermuteForConcat) - { - // - // We need to permute the inputs, because concatenation along - // the requested axis is not supported. - // - PermuteInputsForConcat<T>(workloadFactory, - inputTensorInfos, - inputs, - tmpInputDataStorage, - permuteVector, - concatDim, - outputTensorInfo); - } - - armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim); - - queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); - for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) - { - queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i), - viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions())); - } - - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles; - inputHandles.reserve(inputCount); - - const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); - for (unsigned int i = 0; i < inputCount; ++i) - { - const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i]; - - std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(), - queueDescriptor.m_ViewOrigins[i].m_Origin.data()) - : workloadFactory.CreateTensorHandle(inputTensorInfo); - - inputHandles.emplace_back(std::move(inputHandle)); - } - - armnn::WorkloadInfo workloadInfo; - - for (unsigned int i = 0; i < inputCount; ++i) - { - AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get()); - } - - AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(queueDescriptor, workloadInfo); - - for (auto& inputHandle : inputHandles) - { - inputHandle->Allocate(); - } - - outputHandle->Allocate(); - - unsigned int nextInputId = 0; - for (auto& inputHandle : inputHandles) - { - CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]); - ++nextInputId; - } - - workloadFactory.Finalize(); - workload->Execute(); - - if (needPermuteForConcat) - { - PermuteOutputForConcat<T>(workloadFactory, - outputTensorInfo, - permuteVector, - std::move(outputHandle), - output); - } - else - { - CopyDataFromITensorHandle(output, outputHandle.get()); - } -} - -template <typename T> -LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) -{ - armnn::TensorInfo inputTensorInfo({ 3 }, armnn::GetDataType<T>()); - - auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f })); - auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f })); - auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f })); - - armnn::TensorInfo outputTensorInfo({ 9 }, armnn::GetDataType<T>()); - - LayerTestResult<T, 1> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { inputTensorInfo, inputTensorInfo, inputTensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - 0); - - result.output = MakeTensor<T, 1>(outputTensorInfo, output); - result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f - })); - - return result; -} - -LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation1dTestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadFactory, - const armnn::TensorInfo& outputTensorInfo, - unsigned int dimension, - const float qScale, - const int32_t qOffset) -{ - armnn::TensorInfo inputTensorInfo({ 2, 3 }, armnn::GetDataType<T>()); - - auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 1.0f, 2.0f, 3.0f, - - // Batch 1 - 10.0f, 11.0f, 12.0f, - })); - - auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 4.0f, 5.0f, 6.0f, - - // Batch 1 - 13.0f, 14.0f, 15.0f, - })); - - auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 7.0f, 8.0f, 9.0f, - - // Batch 1 - 16.0f, 17.0f, 18.0f, - })); - - LayerTestResult<T, 2> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { inputTensorInfo, inputTensorInfo, inputTensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - dimension); - - result.output = MakeTensor<T, 2>(outputTensorInfo, output); - return result; -} - -template <typename T> -LayerTestResult<T, 2> Concatenation2dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, int32_t qOffset) -{ - armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>()); - - LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 0, qScale, qOffset); - result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 1.0f, 2.0f, 3.0f, - - // Batch 1 - 10.0f, 11.0f, 12.0f, - - // Batch 2 - 4.0f, 5.0f, 6.0f, - - // Batch 3 - 13.0f, 14.0f, 15.0f, - - // Batch 4 - 7.0f, 8.0f, 9.0f, - - // Batch 5 - 16.0f, 17.0f, 18.0f, - })); - - return result; -} - -LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim0TestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 2> Concatenation2dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, int32_t qOffset) -{ - armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>()); - - LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset); - result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - - // Batch 1 - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f - })); - - return result; -} - -LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim1TestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, - int32_t qOffset) -{ - armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>()); - auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 1.0f, 2.0f, 3.0f, - - // Batch 1 - 10.0f, 11.0f, 12.0f, - })); - - armnn::TensorInfo input1TensorInfo({ 3, 3 }, armnn::GetDataType<T>()); - auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 4.0f, 5.0f, 6.0f, - - // Batch 1 - 13.0f, 14.0f, 15.0f, - - // Batch 0 - 7.0f, 8.0f, 9.0f, - })); - - armnn::TensorInfo input2TensorInfo({ 1, 3 }, armnn::GetDataType<T>()); - auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 1 - 16.0f, 17.0f, 18.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>()); - LayerTestResult<T, 2> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { input0TensorInfo, input1TensorInfo, input2TensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - 0); - - result.output = MakeTensor<T, 2>(outputTensorInfo, output); - result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 1.0f, 2.0f, 3.0f, - - // Batch 1 - 10.0f, 11.0f, 12.0f, - - // Batch 2 - 4.0f, 5.0f, 6.0f, - - // Batch 3 - 13.0f, 14.0f, 15.0f, - - // Batch 4 - 7.0f, 8.0f, 9.0f, - - // Batch 5 - 16.0f, 17.0f, 18.0f, - })); - - return result; -} - -LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, - int32_t qOffset) -{ - armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>()); - auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 1.0f, 2.0f, 3.0f, - - // Batch 1 - 10.0f, 11.0f, 12.0f, - })); - - armnn::TensorInfo input1TensorInfo({ 2, 5 }, armnn::GetDataType<T>()); - auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - - // Batch 1 - 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, - })); - - armnn::TensorInfo input2TensorInfo({ 2, 1 }, armnn::GetDataType<T>()); - auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 9.0f, - - // Batch 1 - 18.0f - })); - - armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>()); - LayerTestResult<T, 2> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { input0TensorInfo, input1TensorInfo, input2TensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - 1); - - result.output = MakeTensor<T, 2>(outputTensorInfo, output); - result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0 - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - - // Batch 1 - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, - })); - - return result; -} - -LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadFactory, - const armnn::TensorInfo& outputTensorInfo, - unsigned int dimension, - float qScale, - int32_t qOffset) -{ - armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); - - auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f - })); - - auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 7.0f, 8.0f, - - // Batch 0, Channel 1 - 9.0f, 10.0f, - - // Batch 0, Channel 2 - 11.0f, 12.0f, - - // Batch 1, Channel 0 - 25.0f, 26.0f, - - // Batch 1, Channel 1 - 27.0f, 28.0f, - - // Batch 1, Channel 2 - 29.0f, 30.0f - })); - - auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 13.0f, 14.0f, - - // Batch 0, Channel 1 - 15.0f, 16.0f, - - // Batch 0, Channel 2 - 17.0f, 18.0f, - - // Batch 1, Channel 0 - 31.0f, 32.0f, - - // Batch 1, Channel 1 - 33.0f, 34.0f, - - // Batch 1, Channel 2 - 35.0f, 36.0f - })); - - LayerTestResult<T, 3> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { inputTensorInfo, inputTensorInfo, inputTensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - dimension); - - result.output = MakeTensor<T, 3>(outputTensorInfo, output); - return result; -} - -template <typename T> -LayerTestResult<T, 3> Concatenation3dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, - int32_t qOffset) -{ - armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>()); - - LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 0, - qScale, qOffset); - result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f, - - // Batch 2, Channel 0 - 7.0f, 8.0f, - - // Batch 2, Channel 1 - 9.0f, 10.0f, - - // Batch 2, Channel 2 - 11.0f, 12.0f, - - // Batch 3, Channel 0 - 25.0f, 26.0f, - - // Batch 3, Channel 1 - 27.0f, 28.0f, - - // Batch 3, Channel 2 - 29.0f, 30.0f, - - // Batch 4, Channel 0 - 13.0f, 14.0f, - - // Batch 4, Channel 1 - 15.0f, 16.0f, - - // Batch 4, Channel 2 - 17.0f, 18.0f, - - // Batch 5, Channel 0 - 31.0f, 32.0f, - - // Batch 5, Channel 1 - 33.0f, 34.0f, - - // Batch 5, Channel 2 - 35.0f, 36.0f - })); - return result; -} - -LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim0TestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 3> Concatenation3dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, int32_t qOffset) -{ - armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, armnn::GetDataType<T>()); - - LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset); - result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 0, Channel 3 - 7.0f, 8.0f, - - // Batch 0, Channel 4 - 9.0f, 10.0f, - - // Batch 0, Channel 5 - 11.0f, 12.0f, - - // Batch 0, Channel 6 - 13.0f, 14.0f, - - // Batch 0, Channel 7 - 15.0f, 16.0f, - - // Batch 0, Channel 8 - 17.0f, 18.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f, - - // Batch 1, Channel 3 - 25.0f, 26.0f, - - // Batch 1, Channel 4 - 27.0f, 28.0f, - - // Batch 1, Channel 5 - 29.0f, 30.0f, - - // Batch 1, Channel 6 - 31.0f, 32.0f, - - // Batch 1, Channel 7 - 33.0f, 34.0f, - - // Batch 1, Channel 8 - 35.0f, 36.0f - })); - - return result; -} - -LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim1TestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 3> Concatenation3dDim2TestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, int32_t qOffset) -{ - armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>()); - - LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 2, qScale, qOffset); - result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f, - })); - - return result; -} - -LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim2TestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, - int32_t qOffset) -{ - armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); - auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f - })); - - armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, armnn::GetDataType<T>()); - auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 7.0f, 8.0f, - - // Batch 0, Channel 1 - 9.0f, 10.0f, - - // Batch 0, Channel 2 - 11.0f, 12.0f, - })); - - armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, armnn::GetDataType<T>()); - auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 25.0f, 26.0f, - - // Batch 0, Channel 1 - 27.0f, 28.0f, - - // Batch 0, Channel 2 - 29.0f, 30.0f, - - // Batch 1, Channel 0 - 13.0f, 14.0f, - - // Batch 1, Channel 1 - 15.0f, 16.0f, - - // Batch 1, Channel 2 - 17.0f, 18.0f, - - // Batch 2, Channel 0 - 31.0f, 32.0f, - - // Batch 2, Channel 1 - 33.0f, 34.0f, - - // Batch 2, Channel 2 - 35.0f, 36.0f - })); - - armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>()); - LayerTestResult<T, 3> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { input0TensorInfo, input1TensorInfo, input2TensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - 0); - - result.output = MakeTensor<T, 3>(outputTensorInfo, output); - result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f, - - // Batch 2, Channel 0 - 7.0f, 8.0f, - - // Batch 2, Channel 1 - 9.0f, 10.0f, - - // Batch 2, Channel 2 - 11.0f, 12.0f, - - // Batch 3, Channel 0 - 25.0f, 26.0f, - - // Batch 3, Channel 1 - 27.0f, 28.0f, - - // Batch 3, Channel 2 - 29.0f, 30.0f, - - // Batch 4, Channel 0 - 13.0f, 14.0f, - - // Batch 4, Channel 1 - 15.0f, 16.0f, - - // Batch 4, Channel 2 - 17.0f, 18.0f, - - // Batch 5, Channel 0 - 31.0f, 32.0f, - - // Batch 5, Channel 1 - 33.0f, 34.0f, - - // Batch 5, Channel 2 - 35.0f, 36.0f - })); - - return result; -} - -LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, - int32_t qOffset) -{ - armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); - auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f - })); - - armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, armnn::GetDataType<T>()); - auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 7.0f, 8.0f, - - // Batch 0, Channel 1 - 9.0f, 10.0f, - - // Batch 0, Channel 2 - 11.0f, 12.0f, - - // Batch 0, Channel 3 - 25.0f, 26.0f, - - // Batch 1, Channel 0 - 27.0f, 28.0f, - - // Batch 1, Channel 1 - 29.0f, 30.0f, - - // Batch 1, Channel 2 - 13.0f, 14.0f, - - // Batch 1, Channel 3 - 15.0f, 16.0f, - })); - - armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, armnn::GetDataType<T>()); - auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 17.0f, 18.0f, - - // Batch 1, Channel 0 - 31.0f, 32.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, armnn::GetDataType<T>()); - LayerTestResult<T, 3> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { input0TensorInfo, input1TensorInfo, input2TensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - 1); - - result.output = MakeTensor<T, 3>(outputTensorInfo, output); - result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 0, Channel 3 - 7.0f, 8.0f, - - // Batch 0, Channel 4 - 9.0f, 10.0f, - - // Batch 0, Channel 5 - 11.0f, 12.0f, - - // Batch 0, Channel 6 - 25.0f, 26.0f, - - // Batch 0, Channel 7 - 17.0f, 18.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f, - - // Batch 1, Channel 3 - 27.0f, 28.0f, - - // Batch 1, Channel 4 - 29.0f, 30.0f, - - // Batch 1, Channel 5 - 13.0f, 14.0f, - - // Batch 1, Channel 6 - 15.0f, 16.0f, - - // Batch 1, Channel 7 - 31.0f, 32.0f, - })); - - return result; -} - -LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); -} - -template <typename T> -LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, - int32_t qOffset) -{ - armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); - auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f - })); - - armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, armnn::GetDataType<T>()); - auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 7.0f, - - // Batch 0, Channel 1 - 9.0f, - - // Batch 0, Channel 2 - 11.0f, - - // Batch 1, Channel 0 - 25.0f, - - // Batch 1, Channel 1 - 27.0f, - - // Batch 1, Channel 2 - 29.0f - })); - - armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, armnn::GetDataType<T>()); - auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 13.0f, 14.0f, 50.0f, - - // Batch 0, Channel 1 - 15.0f, 16.0f, 51.0f, - - // Batch 0, Channel 2 - 17.0f, 18.0f, 52.0f, - - // Batch 1, Channel 0 - 31.0f, 32.0f, 53.0f, - - // Batch 1, Channel 1 - 33.0f, 34.0f, 54.0f, - - // Batch 1, Channel 2 - 35.0f, 36.0f, 55.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>()); - LayerTestResult<T, 3> result(outputTensorInfo); - - std::vector<T> output; - output.resize(outputTensorInfo.GetNumElements()); - Concatenate<T>(workloadFactory, - { input0TensorInfo, input1TensorInfo, input2TensorInfo }, - { input0.data(), input1.data(), input2.data() }, - outputTensorInfo, - output.data(), - 2); - - result.output = MakeTensor<T, 3>(outputTensorInfo, output); - result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f, - - // Batch 0, Channel 1 - 3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f, - - // Batch 0, Channel 2 - 5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f, - - // Batch 1, Channel 0 - 19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f, - - // Batch 1, Channel 1 - 21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f, - - // Batch 1, Channel 2 - 23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f, - })); - - return result; -} - -LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim2DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 4; - constexpr unsigned int inputHeight = 4; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f, 2.0f, 3.0f, 4.0f, - 2.0f, 3.0f, 4.0f, 5.0f, - 3.0f, 4.0f, 5.0f, 6.0f, - 4.0f, 5.0f, 6.0f, 7.0f - })); - - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = input; - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 2; - constexpr unsigned int inputHeight = 2; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth / 2; - constexpr unsigned int outputHeight = inputHeight / 2; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f, 255.0f, - 200.0f, 250.f, - })); - - // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, - // then figures out the interpolants and weights. Note this is different to projecting the centre of the - // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value - // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting - // the centre). - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ - 1.0f - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 4; - constexpr unsigned int inputHeight = 4; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth / 2; - constexpr unsigned int outputHeight = inputHeight / 2; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f, 2.0f, 3.0f, 4.0f, - 2.0f, 3.0f, 4.0f, 5.0f, - 3.0f, 4.0f, 5.0f, 6.0f, - 4.0f, 5.0f, 6.0f, 7.0f - })); - - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ - 1.f, 3.f, - 3.f, 5.f - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 5; - constexpr unsigned int inputHeight = 3; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = 3; - constexpr unsigned int outputHeight = 2; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f, 2.0f, 3.0f, 5.0f, 8.0f, - 13.0f, 21.0f, 34.0f, 55.0f, 89.0f, - 144.0f, 233.0f, 377.0f, 610.0f, 987.0f - })); - - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ - 1.0f, 2.6666f, 6.0f, - 78.5f, 179.3333f, 401.f - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 2; - constexpr unsigned int inputHeight = 3; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = 5; - constexpr unsigned int outputHeight = 3; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f, 2.0f, - 13.0f, 21.0f, - 144.0f, 233.0f - })); - - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ - 1.0f, 1.4f, 1.8f, 2.f, 2.f, - 13.f, 16.2f, 19.4f, 21.f, 21.f, - 144.f, 179.6f, 215.2f, 233.f, 233.f - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int width = 2; - constexpr unsigned int height = 3; - - const armnn::TensorInfo tensorInfo({height, width }, - armnn::DataType::Float32); - auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({ - -10.0f, -5.0f, - 0.0f, 5.0f, - 10.0f, 10.0f - })); - - LayerTestResult<float, 2> ret(tensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); - - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(tensorInfo); - - armnn::FakeQuantizationQueueDescriptor data; - armnn::WorkloadInfo info; - - AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); - float min = -10.f; - float max = 10.f; - - data.m_Parameters.m_Min = min; - data.m_Parameters.m_Max = max; - - armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]); - armnn::FakeQuantizationQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); - - ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({ - 0.0f, 63.0f, - 128.0f, 191.0f, - 255.0f, 255.0f - })); - return ret; -} - -LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 1; - constexpr unsigned int inputHeight = 1; - constexpr unsigned int inputChannels = 10; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f - })); - - const float approxInvL2Norm = 0.050964719f; - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f * approxInvL2Norm, - 2.0f * approxInvL2Norm, - 3.0f * approxInvL2Norm, - 4.0f * approxInvL2Norm, - 5.0f * approxInvL2Norm, - 6.0f * approxInvL2Norm, - 7.0f * approxInvL2Norm, - 8.0f * approxInvL2Norm, - 9.0f * approxInvL2Norm, - 10.0f * approxInvL2Norm - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::L2NormalizationQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -namespace -{ - -float CalcInvL2Norm(std::initializer_list<float> elements) -{ - const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f, - [](float acc, float element) { return acc + element * element; }); - return 1.0f / sqrtf(reduction); -} - -} - -LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 5; - constexpr unsigned int inputHeight = 1; - constexpr unsigned int inputChannels = 2; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f, 3.0f, 5.0f, 7.0f, 9.0f, - 2.0f, 4.0f, 6.0f, 8.0f, 10.0f - })); - - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 1.0f * CalcInvL2Norm({ 1.0f, 2.0f }), - 3.0f * CalcInvL2Norm({ 3.0f, 4.0f }), - 5.0f * CalcInvL2Norm({ 5.0f, 6.0f }), - 7.0f * CalcInvL2Norm({ 7.0f, 8.0f }), - 9.0f * CalcInvL2Norm({ 9.0f, 10.0f }), - - 2.0f * CalcInvL2Norm({ 1.0f, 2.0f }), - 4.0f * CalcInvL2Norm({ 3.0f, 4.0f }), - 6.0f * CalcInvL2Norm({ 5.0f, 6.0f }), - 8.0f * CalcInvL2Norm({ 7.0f, 8.0f }), - 10.0f * CalcInvL2Norm({ 9.0f, 10.0f }) - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::L2NormalizationQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 3; - constexpr unsigned int inputHeight = 4; - constexpr unsigned int inputChannels = 2; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - // Channel 0 - 119.0f, 21.0f, 150.0f, - 149.0f, 32.0f, 179.0f, - 15.0f, 227.0f, 141.0f, - 147.0f, 199.0f, 220.0f, - - // Channel 1 - 110.0f, 140.0f, 73.0f, - 211.0f, 212.0f, 89.0f, - 24.0f, 138.0f, 188.0f, - 162.0f, 12.0f, 161.0f, - })); - - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - 119.0f * CalcInvL2Norm({ 119.0f, 110.0f }), - 21.0f * CalcInvL2Norm({ 21.0f, 140.0f }), - 150.0f * CalcInvL2Norm({ 150.0f, 73.0f }), - 149.0f * CalcInvL2Norm({ 149.0f, 211.0f }), - 32.0f * CalcInvL2Norm({ 32.0f, 212.0f }), - 179.0f * CalcInvL2Norm({ 179.0f, 89.0f }), - 15.0f * CalcInvL2Norm({ 15.0f, 24.0f }), - 227.0f * CalcInvL2Norm({ 227.0f, 138.0f }), - 141.0f * CalcInvL2Norm({ 141.0f, 188.0f }), - 147.0f * CalcInvL2Norm({ 147.0f, 162.0f }), - 199.0f * CalcInvL2Norm({ 199.0f, 12.0f }), - 220.0f * CalcInvL2Norm({ 220.0f, 161.0f }), - - 110.0f * CalcInvL2Norm({ 119.0f, 110.0f }), - 140.0f * CalcInvL2Norm({ 21.0f, 140.0f }), - 73.0f * CalcInvL2Norm({ 150.0f, 73.0f }), - 211.0f * CalcInvL2Norm({ 149.0f, 211.0f }), - 212.0f * CalcInvL2Norm({ 32.0f, 212.0f }), - 89.0f * CalcInvL2Norm({ 179.0f, 89.0f }), - 24.0f * CalcInvL2Norm({ 15.0f, 24.0f }), - 138.0f * CalcInvL2Norm({ 227.0f, 138.0f }), - 188.0f * CalcInvL2Norm({ 141.0f, 188.0f }), - 162.0f * CalcInvL2Norm({ 147.0f, 162.0f }), - 12.0f * CalcInvL2Norm({ 199.0f, 12.0f }), - 161.0f * CalcInvL2Norm({ 220.0f, 161.0f }), - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::L2NormalizationQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 3; - constexpr unsigned int inputHeight = 4; - constexpr unsigned int inputChannels = 3; - constexpr unsigned int inputBatchSize = 2; - - constexpr unsigned int outputWidth = inputWidth; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - // Batch 0, Channel 0 - 235.0f, 46.0f, 178.0f, - 100.0f, 123.0f, 19.0f, - 172.0f, 74.0f, 250.0f, - 6.0f, 195.0f, 80.0f, - - // Batch 0, Channel 1 - 113.0f, 95.0f, 202.0f, - 77.0f, 114.0f, 71.0f, - 122.0f, 246.0f, 166.0f, - 82.0f, 28.0f, 37.0f, - - // Batch 0, Channel 2 - 56.0f, 170.0f, 162.0f, - 194.0f, 89.0f, 254.0f, - 12.0f, 209.0f, 200.0f, - 1.0f, 64.0f, 54.0f, - - // Batch 1, Channel 0 - 67.0f, 90.0f, 49.0f, - 7.0f, 163.0f, 18.0f, - 25.0f, 117.0f, 103.0f, - 247.0f, 59.0f, 189.0f, - - // Batch 1, Channel 1 - 239.0f, 104.0f, 199.0f, - 17.0f, 124.0f, 153.0f, - 222.0f, 217.0f, 75.0f, - 32.0f, 126.0f, 21.0f, - - // Batch 1, Channel 2 - 97.0f, 145.0f, 215.0f, - 115.0f, 116.0f, 238.0f, - 226.0f, 16.0f, 132.0f, - 92.0f, 125.0f, 88.0f, - })); - - LayerTestResult<float, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - - // Batch 0, Channel 0 - 235.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), - 46.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), - 178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), - 100.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), - 123.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), - 19.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), - 172.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), - 74.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), - 250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), - 6.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), - 195.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), - 80.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), - - // Batch 0, Channel 1 - 113.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), - 95.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), - 202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), - 77.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), - 114.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), - 71.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), - 122.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), - 246.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), - 166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), - 82.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), - 28.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), - 37.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), - - // Batch 0, Channel 2 - 56.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), - 170.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), - 162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), - 194.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), - 89.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), - 254.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), - 12.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), - 209.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), - 200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), - 1.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), - 64.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), - 54.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), - - // Batch 1, Channel 0 - 67.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), - 90.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), - 49.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), - 7.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), - 163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), - 18.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), - 25.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), - 117.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), - 103.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), - 247.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), - 59.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), - 189.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), - - // Batch 1, Channel 1 - 239.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), - 104.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), - 199.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), - 17.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), - 124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), - 153.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), - 222.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), - 217.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), - 75.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), - 32.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), - 126.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), - 21.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), - - // Batch 1, Channel 2 - 97.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), - 145.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), - 215.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), - 115.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), - 116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), - 238.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), - 226.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), - 16.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), - 132.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), - 92.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), - 125.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), - 88.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::L2NormalizationQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -template <typename T> -LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory, - float qScale, - int32_t qOffset) -{ - constexpr unsigned int inputWidth = 3; - constexpr unsigned int inputHeight = 4; - constexpr unsigned int inputChannels = 3; - constexpr unsigned int inputBatchSize = 2; - - constexpr unsigned int outputWidth = inputWidth; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::GetDataType<T>()); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - // Batch 0, Channel 0 - 235.0f, 46.0f, 178.0f, - 100.0f, 123.0f, 19.0f, - 172.0f, 74.0f, 250.0f, - 6.0f, 195.0f, 80.0f, - - // Batch 0, Channel 1 - 113.0f, 95.0f, 202.0f, - 77.0f, 114.0f, 71.0f, - 122.0f, 246.0f, 166.0f, - 82.0f, 28.0f, 37.0f, - - // Batch 0, Channel 2 - 56.0f, 170.0f, 162.0f, - 194.0f, 89.0f, 254.0f, - 12.0f, 209.0f, 200.0f, - 1.0f, 64.0f, 54.0f, - - // Batch 1, Channel 0 - 67.0f, 90.0f, 49.0f, - 7.0f, 163.0f, 18.0f, - 25.0f, 117.0f, 103.0f, - 247.0f, 59.0f, 189.0f, - - // Batch 1, Channel 1 - 239.0f, 104.0f, 199.0f, - 17.0f, 124.0f, 153.0f, - 222.0f, 217.0f, 75.0f, - 32.0f, 126.0f, 21.0f, - - // Batch 1, Channel 2 - 97.0f, 145.0f, 215.0f, - 115.0f, 116.0f, 238.0f, - 226.0f, 16.0f, 132.0f, - 92.0f, 125.0f, 88.0f, - }))); - - LayerTestResult<T, 4> result(outputTensorInfo); - result.outputExpected = input; - - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo); - AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]); - - armnn::ConstantQueueDescriptor descriptor; - descriptor.m_LayerOutput = &constantTensor; - - armnn::WorkloadInfo info; - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info); - - outputHandle->Allocate(); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory) -{ - return ConstantTestImpl<float>(workloadFactory, 0.0f, 0); -} - -LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory) -{ - return ConstantTestImpl<uint8_t>(workloadFactory, 1.0f, 0); -} - -LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int outputWidth = 3; - unsigned int outputHeight = 6; - unsigned int outputChannels = 3; - - unsigned int inputWidth1 = 3; - unsigned int inputHeight1 = 6; - unsigned int inputChannels1 = 2; - - unsigned int inputWidth2 = 3; - unsigned int inputHeight2 = 6; - unsigned int inputChannels2 = 1; - - // Defines the tensor descriptors. - armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8); - - // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize them. - const float scale = 0.13497836f; - const int32_t offset = -7; - - outputTensorInfo.SetQuantizationScale(scale); - outputTensorInfo.SetQuantizationOffset(offset); - inputTensorInfo1.SetQuantizationScale(scale); - inputTensorInfo1.SetQuantizationOffset(offset); - inputTensorInfo2.SetQuantizationScale(scale); - inputTensorInfo2.SetQuantizationOffset(offset); - - LayerTestResult<uint8_t, 3> ret(outputTensorInfo); - - ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>( - { - 1, 2, 3, - 4, 5, 6, - 7, 8, 9, - 10, 11, 12, - 13, 14, 15, - 16, 17, 18, - - 19, 20, 21, - 22, 23, 24, - 25, 26, 27, - 28, 29, 30, - 31, 32, 33, - 34, 35, 36, - - 37, 38, 39, - 40, 41, 42, - 43, 44, 45, - 46, 47, 48, - 49, 50, 51, - 52, 53, 54, - }) - ); - - auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>( - { - 1, 2, 3, - 4, 5, 6, - 7, 8, 9, - 10, 11, 12, - 13, 14, 15, - 16, 17, 18, - - 19, 20, 21, - 22, 23, 24, - 25, 26, 27, - 28, 29, 30, - 31, 32, 33, - 34, 35, 36, - }) - ); - - auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>( - { - 37, 38, 39, - 40, 41, 42, - 43, 44, 45, - 46, 47, 48, - 49, 50, 51, - 52, 53, 54, - }) - ); - - std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0]. - armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - - std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1]. - armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); - - - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - bool subTensorsSupported = workloadFactory.SupportsSubTensors(); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo1); - - std::unique_ptr<armnn::ITensorHandle> inputHandle2 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo2); - - - armnn::MergerQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - data.m_ViewOrigins.push_back(window1); - data.m_ViewOrigins.push_back(window2); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); - - inputHandle1->Allocate(); - inputHandle2->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); - - return ret; -} - -LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int batchSize = 1; - unsigned int channels = 2; - unsigned int height = 2; - unsigned int width = 3; - - const float scale = 7.0f; - const int32_t offset = 3; - - armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; - armnn::TensorInfo outputTensorInfo; - - const unsigned int shape[] = { batchSize, channels, height, width }; - inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); - inputTensorInfo1.SetQuantizationScale(scale); - inputTensorInfo1.SetQuantizationOffset(offset); - - inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); - inputTensorInfo2.SetQuantizationScale(scale); - inputTensorInfo2.SetQuantizationOffset(offset); - - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(scale); - outputTensorInfo.SetQuantizationOffset(offset); - - // See dequantized values to the right. - auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( - { - 63, 35, 77, 70, 56, 112, // 420, 224, 518, 469, 371, 763 - 203, 28, 252, 168, 245, 91 // 1400, 175, 1743, 1155, 1694, 616 - })); - - // See dequantized values to the right. - auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( - { - 21, 7, 175, 231, 175, 210, // 126, 28, 1204, 1596, 1204, 1449 - 126, 161, 63, 21, 105, 126 // 861, 1106, 420, 126, 714, 861 - })); - - // See dequantized values to the right. - LayerTestResult<uint8_t, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>( - { - 81, 39, 249, 255, 228, 255, // 546, 252, 1722, 2065(clamped), 1575, 2212(clamped) - 255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477 - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::AdditionQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); - - inputHandle1->Allocate(); - inputHandle2->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - - return result; -} - -namespace -{ -LayerTestResult<uint8_t, 4> MultiplicationUint8TestHelper(armnn::IWorkloadFactory& workloadFactory, - const unsigned int shape0[4], - const std::vector<uint8_t> & values0, - float scale0, - int32_t offset0, - const unsigned int shape1[4], - const std::vector<uint8_t> & values1, - float scale1, - int32_t offset1, - const unsigned int outShape[4], - const std::vector<uint8_t> & outValues, - float outScale, - int32_t outOffset) -{ - armnn::TensorInfo inputTensorInfo0(4, shape0, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo1(4, shape1, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo outputTensorInfo(4, outShape, armnn::DataType::QuantisedAsymm8); - - inputTensorInfo0.SetQuantizationScale(scale0); - inputTensorInfo0.SetQuantizationOffset(offset0); - - inputTensorInfo1.SetQuantizationScale(scale1); - inputTensorInfo1.SetQuantizationOffset(offset1); - - outputTensorInfo.SetQuantizationScale(outScale); - outputTensorInfo.SetQuantizationOffset(outOffset); - - auto input0 = MakeTensor<uint8_t, 4>(inputTensorInfo0, values0); - auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, values1); - - LayerTestResult<uint8_t, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, outValues); - - std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::MultiplicationQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); - - inputHandle0->Allocate(); - inputHandle1->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - - return result; -} -} // anonymous namespace - -LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - unsigned int batchSize = 1; - unsigned int channels = 2; - unsigned int height = 2; - unsigned int width = 3; - const unsigned int shape[] = { batchSize, channels, height, width }; - - // See dequantized values to the right. - std::vector<uint8_t> input0({ - 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440, - 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120 - }); - - // See dequantized values to the right. - std::vector<uint8_t> input1({ - 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747, - 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297 - }); - - // See dequantized values to the right. - std::vector<uint8_t> output( - { - 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680, - 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640 - }); - - return MultiplicationUint8TestHelper(workloadFactory, - shape, - input0, - 4.0f, - 1, - shape, - input1, - 3.0f, - -2, - shape, - output, - 1366.255f, // Scale/offset chosen to have output values out of range. - -5); -} - -LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 2, 2, 3 }; - const unsigned int shape1[] = { 1, 1, 1, 1 }; - - std::vector<uint8_t> input0({ - 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12 - }); - - std::vector<uint8_t> input1({2}); - - std::vector<uint8_t> output({ - 2, 4, 6, 8, 10, 12, - 14, 16, 18, 20, 22, 24 - }); - - return MultiplicationUint8TestHelper(workloadFactory, - shape0, - input0, - 1.0f, - 0, - shape1, - input1, - 1.0f, - 0, - shape0, - output, - 1.0f, - 0); -} - -LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 2, 2, 3 }; - const unsigned int shape1[] = { 1, 1, 1, 3 }; - - std::vector<uint8_t> input0({ - 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12 - }); - - std::vector<uint8_t> input1({1, 2, 3}); - - std::vector<uint8_t> output({ - 1, 4, 9, 4, 10, 18, - 7, 16, 27, 10, 22, 36 - }); - - return MultiplicationUint8TestHelper(workloadFactory, - shape0, - input0, - 1.0f, - 0, - shape1, - input1, - 1.0f, - 0, - shape0, - output, - 1.0f, - 0); -} - -namespace -{ -template <typename T> -LayerTestResult<T, 4> SubtractionTestHelper(armnn::IWorkloadFactory& workloadFactory, - const unsigned int shape0[4], - const std::vector<T>& values0, - float scale0, - int32_t offset0, - const unsigned int shape1[4], - const std::vector<T> & values1, - float scale1, - int32_t offset1, - const unsigned int outShape[4], - const std::vector<T> & outValues, - float outScale, - int32_t outOffset) -{ - auto dataType = (std::is_same<T, uint8_t>::value ? - armnn::DataType::QuantisedAsymm8 : - armnn::DataType::Float32); - - armnn::TensorInfo inputTensorInfo0(4, shape0, dataType); - armnn::TensorInfo inputTensorInfo1(4, shape1, dataType); - armnn::TensorInfo outputTensorInfo(4, outShape, dataType); - - inputTensorInfo0.SetQuantizationScale(scale0); - inputTensorInfo0.SetQuantizationOffset(offset0); - - inputTensorInfo1.SetQuantizationScale(scale1); - inputTensorInfo1.SetQuantizationOffset(offset1); - - outputTensorInfo.SetQuantizationScale(outScale); - outputTensorInfo.SetQuantizationOffset(outOffset); - - auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0); - auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1); - - LayerTestResult<T, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues); - - std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); - std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::SubtractionQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSubtraction(data, info); - - inputHandle0->Allocate(); - inputHandle1->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); - CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - - return result; -} -} // anonymous namespace - -LayerTestResult<uint8_t, 4> SubtractionUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 1, 2, 2 }; - const unsigned int shape1[] = { 1, 1, 2, 2 }; - - std::vector<uint8_t> input0({ 10, 12, 14, 16 }); - std::vector<uint8_t> input1({ 1, 2, 1, 2 }); - std::vector<uint8_t> output({ 3, 3, 5, 5 }); - - return SubtractionTestHelper(workloadFactory, - shape0, input0, 0.5f, 2, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -LayerTestResult<uint8_t, 4> SubtractionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 1, 2, 2 }; - const unsigned int shape1[] = { 1, 1, 1, 1 }; - - std::vector<uint8_t> input0({ 10, 12, 14, 16 }); - std::vector<uint8_t> input1({ 2 }); - std::vector<uint8_t> output({ 5, 6, 7, 8 }); - - return SubtractionTestHelper(workloadFactory, - shape0, input0, 0.5f, 2, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 3); -} - -LayerTestResult<uint8_t, 4> SubtractionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 1, 2, 2 }; - const unsigned int shape1[] = { 1, 1, 2, 1 }; - - std::vector<uint8_t> input0({ 10, 12, 14, 16 }); - std::vector<uint8_t> input1({ 2, 1 }); - std::vector<uint8_t> output({ 8, 11, 12, 15 }); - - return SubtractionTestHelper(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -LayerTestResult<float, 4> SubtractionTest(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 1, 2, 2 }; - const unsigned int shape1[] = { 1, 1, 2, 2 }; - - std::vector<float> input0({ 1, 2, 3, 4 }); - std::vector<float> input1({ 1, -1, 0, 2 }); - std::vector<float> output({ 0, 3, 3, 2 }); - - return SubtractionTestHelper(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -LayerTestResult<float, 4> SubtractionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 1, 2, 2 }; - const unsigned int shape1[] = { 1, 1, 1, 1 }; - - std::vector<float> input0({ 1, 2, 3, 4 }); - std::vector<float> input1({ 10 }); - std::vector<float> output({ -9, -8, -7, -6 }); - - return SubtractionTestHelper(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -LayerTestResult<float, 4> SubtractionBroadcastTest(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int shape0[] = { 1, 1, 2, 2 }; - const unsigned int shape1[] = { 1, 1, 1, 2 }; - - std::vector<float> input0({ 1, 2, 3, 4 }); - std::vector<float> input1({ 10, -5 }); - std::vector<float> output({ -9, 7, -7, 9 }); - - return SubtractionTestHelper(workloadFactory, - shape0, input0, 1.0f, 0, - shape1, input1, 1.0f, 0, - shape0, output, 1.0f, 0); -} - -LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 4; - constexpr unsigned int inputHeight = 4; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(1.5f); - inputTensorInfo.SetQuantizationOffset(-3); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(1.5f); - outputTensorInfo.SetQuantizationOffset(-3); - - auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ - 1, 2, 3, 4, - 2, 3, 4, 5, - 3, 4, 5, 6, - 4, 5, 6, 7 - })); - - LayerTestResult<uint8_t, 4> result(outputTensorInfo); - result.outputExpected = input; - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 2; - constexpr unsigned int inputHeight = 2; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth / 2; - constexpr unsigned int outputHeight = inputHeight / 2; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(0.1567f); - inputTensorInfo.SetQuantizationOffset(1); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(0.1567f); - outputTensorInfo.SetQuantizationOffset(1); - - auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ - 1, 255, - 200, 250 - })); - - // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, - // then figures out the interpolants and weights. Note this is different to projecting the centre of the - // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value - // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting - // the centre). - LayerTestResult<uint8_t, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ - 1 - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 4; - constexpr unsigned int inputHeight = 4; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = inputWidth / 2; - constexpr unsigned int outputHeight = inputHeight / 2; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(3.141592f); - inputTensorInfo.SetQuantizationOffset(3); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(3.141592f); - outputTensorInfo.SetQuantizationOffset(3); - - auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ - 1, 2, 3, 4, - 2, 3, 4, 5, - 3, 4, 5, 6, - 4, 5, 6, 7 - })); - - LayerTestResult<uint8_t, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ - 1, 3, - 3, 5 - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 3; - constexpr unsigned int inputHeight = 2; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = 2; - constexpr unsigned int outputHeight = 1; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(1.5f); - inputTensorInfo.SetQuantizationOffset(-1); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(1.5f); - outputTensorInfo.SetQuantizationOffset(-1); - - auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ - 1, 2, 3, // 3.0, 4.5, 6.0 - 5, 8, 13 // 9.0, 13.5, 21.0 - })); - - LayerTestResult<uint8_t, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ - 1, 3 // 3.0, 5.25 - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - constexpr unsigned int inputWidth = 2; - constexpr unsigned int inputHeight = 3; - constexpr unsigned int inputChannels = 1; - constexpr unsigned int inputBatchSize = 1; - - constexpr unsigned int outputWidth = 5; - constexpr unsigned int outputHeight = 3; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(0.010765f); - inputTensorInfo.SetQuantizationOffset(7); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(0.010132f); - outputTensorInfo.SetQuantizationOffset(-18); - - auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ - 24, 228, // 0.183005, 2.379065, - 105, 128, // 1.05497, 1.302565 - 230, 71 // 2.400595, 0.68896 - })); - - LayerTestResult<uint8_t, 4> result(outputTensorInfo); - result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ - 0, 87, 173, 217, 217, // 0.18300501, 1.06142902, 1.93985295, 2.37906504, 2.37906504 - 86, 96, 106, 111, 111, // 1.05497003, 1.15400803, 1.25304604, 1.30256498, 1.30256498 - 219, 151, 84, 50, 50 // 2.40059495, 1.71594095, 1.03128707, 0.68896002, 0.68896002 - })); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ResizeBilinearQueueDescriptor descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - return result; -} - -LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory) -{ - auto ret = BatchNormTestImpl<float>(workloadFactory, 0.f, 0); - return ret; -} - -LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - auto ret = BatchNormTestImpl<uint8_t>(workloadFactory, 1.f/20.f, 50); - return ret; -} - -LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return ConstantTestImpl<uint8_t>(workloadFactory, 2e-6f, 1); -} - -LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation1dTestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim0DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation2dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim2TestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return Concatenation3dDim2DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); -} - -LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding) -{ - return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding); -} - -LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding) -{ - return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<uint8_t>(workloadFactory, forceNoPadding, 3.0f, -5); -} - -LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding) -{ - return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<float>(workloadFactory, forceNoPadding); -} - -LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding) -{ - return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<uint8_t>(workloadFactory, forceNoPadding, 0.1f, 128); -} - -LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return SimpleAveragePooling2dTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1); -} - -LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding) -{ - return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding); -} - -LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return LargeTensorsAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1); -} - -LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return SimpleL2Pooling2dTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return SimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize3Stride1TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize3Stride1TestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize3Stride3TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize3Stride3TestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize3Stride4TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize3Stride4TestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize7TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize7TestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize9TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return L2Pooling2dSize9TestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return AsymmetricNonSquarePooling2dTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return AsymmetricNonSquarePooling2dTestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::PoolingAlgorithm poolingType) -{ - return ComparePooling2dTestCommon<float>(workloadFactory, refWorkloadFactory, poolingType); -} - -LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::PoolingAlgorithm poolingType) -{ - return ComparePooling2dTestCommon<uint8_t>(workloadFactory, refWorkloadFactory, poolingType, 0.1f, 128); -} - -LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, - bool transposeWeights) -{ - return FullyConnectedLargeTestCommon<float>(workloadFactory, transposeWeights); -} - -LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleMaxPooling2dTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleMaxPooling2dTestCommon<uint8_t>(workloadFactory, 1.0f, -5); -} - -LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingMaxPooling2dSize3TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingMaxPooling2dSize3TestCommon<uint8_t>(workloadFactory, 1.0f, -5); -} - -LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleAveragePooling2dTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( - armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingAveragePooling2dSize3TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingAveragePooling2dSize3TestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleL2Pooling2dTestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingSimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingL2Pooling2dSize3TestCommon<float>(workloadFactory); -} - -LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return IgnorePaddingL2Pooling2dSize3TestCommon<uint8_t>(workloadFactory); -} - -LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory) -{ - return SimplePermuteFloat32TestCommon(workloadFactory); -}; - -LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - return SimplePermuteUint8TestCommon(workloadFactory); -}; - -LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory) -{ - return PermuteFloat32ValueSet1TestCommon(workloadFactory); -}; - -LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory) -{ - return PermuteFloat32ValueSet2TestCommon(workloadFactory); -}; - -LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory) -{ - return PermuteFloat32ValueSet3TestCommon(workloadFactory); -};
\ No newline at end of file diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp deleted file mode 100644 index 365a1f53d4..0000000000 --- a/src/armnn/backends/test/LayerTests.hpp +++ /dev/null @@ -1,345 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "armnn/ArmNN.hpp" -#include "armnn/Tensor.hpp" -#include "Half.hpp" - -#include <boost/multi_array.hpp> -#include <boost/assert.hpp> -#include <array> - -// Layer callables. - -namespace armnn -{ -class IWorkloadFactory; -} - -template <std::size_t n> -boost::array<unsigned int, n> GetTensorShapeAsArray(const armnn::TensorInfo& tensorInfo) -{ - BOOST_ASSERT_MSG(n == tensorInfo.GetNumDimensions(), - "Attempting to construct a shape array of mismatching size"); - - boost::array<unsigned int, n> shape; - for (unsigned int i = 0; i < n; i++) - { - shape[i] = tensorInfo.GetShape()[i]; - } - return shape; -} - -template <typename T, std::size_t n> -struct LayerTestResult -{ - LayerTestResult(const armnn::TensorInfo& outputInfo) - { - auto shape( GetTensorShapeAsArray<n>(outputInfo) ); - output.resize(shape); - outputExpected.resize(shape); - supported = true; - } - - boost::multi_array<T, n> output; - boost::multi_array<T, n> outputExpected; - bool supported; -}; - -LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<float, 4> -Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory); - - -LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); -LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); - -LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); - -LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding); -LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding); -LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding); -LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding ); -LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding); -LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( - armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::PoolingAlgorithm poolingType); -LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::PoolingAlgorithm poolingType); - -LayerTestResult<float, 4> ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta); -LayerTestResult<uint8_t, 2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta); - -LayerTestResult<float, 4> SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory); - -template<typename T> -LayerTestResult<T, 4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory); - -LayerTestResult<float, 4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::NormalizationAlgorithmChannel normChannel, - armnn::NormalizationAlgorithmMethod normMethod); - -LayerTestResult<float, 2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, float beta); - -LayerTestResult<float, 2> FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled, - bool transposeWeights); - -std::vector<LayerTestResult<float, 3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 3> MergerTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> AdditionTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory); - -LayerTestResult<float, 4> SubtractionTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> SubtractionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> SubtractionBroadcastTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::ActivationFunction f, - unsigned int batchSize); - -LayerTestResult<float, 4> DivisionTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> DivisionByZeroTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory); - -LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory); - -LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - float upperBound, - float lowerBound); - -// Tests that the output should be identical to the input when the output dimensions match the input ones. -LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory); - -// Tests the behaviour of the resize bilinear operation when rescaling a 2x2 image into a 1x1 image. -LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory); - -// Tests the resize bilinear for minification of a square input matrix (also: input dimensions are a -// multiple of output dimensions). -LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory); - -// Tests the resize bilinear for minification (output dimensions smaller than input dimensions). -LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory); - -// Tests the resize bilinear for magnification (output dimensions bigger than input dimensions). -LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, float upperBound); -LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, - float upperBound, - float lowerBound); - -LayerTestResult<uint8_t, 2> FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); - -std::vector<LayerTestResult<uint8_t, 3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> SubtractionUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> SubtractionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> SubtractionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::ActivationFunction f); - -LayerTestResult<uint8_t, 2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - float beta); - -LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> DivisionUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> DivisionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> DivisionBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, - bool biasEnabled); - -LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); - - -LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, - bool transposeWeights); -LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest - (armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 2> - LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<float, 2> -LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(armnn::IWorkloadFactory& workloadFactory); - -LayerTestResult<float, 4> SimpleConvertFp16ToFp32Test(armnn::IWorkloadFactory& workloadFactory); -LayerTestResult<armnn::Half, 4> SimpleConvertFp32ToFp16Test(armnn::IWorkloadFactory& workloadFactory); diff --git a/src/armnn/backends/test/LstmTestImpl.hpp b/src/armnn/backends/test/LstmTestImpl.hpp deleted file mode 100644 index 2c4e166084..0000000000 --- a/src/armnn/backends/test/LstmTestImpl.hpp +++ /dev/null @@ -1,1150 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> - -#include "test/TensorHelpers.hpp" -#include "QuantizeHelper.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include <backends/WorkloadInfo.hpp> -#include "backends/WorkloadFactory.hpp" - -LayerTestResult<float, 2> LstmNoCifgNoPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, - const boost::multi_array<float, 2>& input, - const boost::multi_array<float, 2>& outputExpected) -{ - unsigned int batchSize = boost::numeric_cast<unsigned int>(input.shape()[0]); - unsigned int inputSize = boost::numeric_cast<unsigned int>(input.shape()[1]); - unsigned int outputSize = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); - // cellSize and outputSize have the same size when there is no projection. - unsigned numUnits = outputSize; - - - armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, armnn::GetDataType<float>()); - armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, armnn::GetDataType<float>()); - - - armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, armnn::GetDataType<float>()); - armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, armnn::GetDataType<float>()); - armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType<float>()); - - - LayerTestResult<float, 2> ret(outputTensorInfo); - - std::vector<float> inputVector; - inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); - auto inputTensor = MakeTensor<float,2>(inputTensorInfo, inputVector); - - std::vector<float> cellStateInVector(batchSize * numUnits, 0.f); - auto cellStateInTensor = MakeTensor<float,2>(cellStateInTensorInfo, cellStateInVector); - - std::vector<float> outputStateInVector(batchSize * outputSize, 0.f); - auto outputStateInTensor = MakeTensor<float,2>(outputStateInTensorInfo, outputStateInVector); - - std::vector<float> scratchBufferVector(batchSize * numUnits * 3, 0.f); - auto scratchBufferTensor = MakeTensor<float,2>(scratchBufferTensorInfo, scratchBufferVector); - - std::vector<float> outputStateOutVector(batchSize * outputSize, 0.f); - auto outputStateOutTensor = MakeTensor<float,2>(outputStateOutTensorInfo, outputStateOutVector); - - std::vector<float> cellStateOutVector(batchSize * numUnits, 0.f); - auto cellStateOutTensor = MakeTensor<float,2>(cellStateOutTensorInfo, cellStateOutVector); - - std::vector<float> outputVector; - outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); - ret.outputExpected = MakeTensor<float, 2>(outputTensorInfo, outputVector); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> cellStateInHandle = - workloadFactory.CreateTensorHandle(cellStateInTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputStateInHandle = - workloadFactory.CreateTensorHandle(outputStateInTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle = - workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); - std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle = - workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - - armnn::LstmQueueDescriptor data; - armnn::WorkloadInfo info; - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); - AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); - - AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); - AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); - AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - armnn::TensorInfo tensorInfo4({numUnits}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfo8({numUnits, 2}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfo16({numUnits, 4}, armnn::GetDataType<float>()); - - auto inputToInputWeights = MakeTensor<float, 2>(tensorInfo8, {-0.45018822f, -0.02338299f, -0.0870589f, - -0.34550029f, 0.04266912f, -0.15680569f, - -0.34856534f, 0.43890524f}); - - auto inputToForgetWeights = MakeTensor<float, 2>(tensorInfo8, {0.09701663f, 0.20334584f, -0.50592935f, - -0.31343272f, -0.40032279f, 0.44781327f, - 0.01387155f, -0.35593212f}); - - auto inputToCellWeights = MakeTensor<float, 2>(tensorInfo8, {-0.50013041f, 0.1370284f, 0.11810488f, 0.2013163f, - -0.20583314f, 0.44344562f, 0.22077113f, - -0.29909778f}); - - auto inputToOutputWeights = MakeTensor<float, 2>(tensorInfo8, {-0.25065863f, -0.28290087f, 0.04613829f, - 0.40525138f, 0.44272184f, 0.03897077f, - -0.1556896f, 0.19487578f}); - - auto recurrentToInputWeights = MakeTensor<float, 2>(tensorInfo16, {-0.0063535f, -0.2042388f, 0.31454784f, - -0.35746509f, 0.28902304f, 0.08183324f, - -0.16555229f, 0.02286911f, -0.13566875f, - 0.03034258f, 0.48091322f, -0.12528998f, - 0.24077177f, -0.51332325f, -0.33502164f, - 0.10629296f}); - - auto recurrentToForgetWeights = MakeTensor<float, 2>(tensorInfo16, {-0.48684245f, -0.06655136f, 0.42224967f, - 0.2112639f, 0.27654213f, 0.20864892f, - -0.07646349f, 0.45877004f, 0.00141793f, - -0.14609534f, 0.36447752f, 0.09196436f, - 0.28053468f, 0.01560611f, -0.20127171f, - -0.01140004f}); - - auto recurrentToCellWeights = MakeTensor<float, 2>(tensorInfo16, {-0.3407414f, 0.24443203f, -0.2078532f, - 0.26320225f, 0.05695659f, -0.00123841f, - -0.4744786f, -0.35869038f, -0.06418842f, - -0.13502428f, -0.501764f, 0.22830659f, - -0.46367589f, 0.26016325f, -0.03894562f, - -0.16368064f}); - - auto recurrentToOutputWeights = MakeTensor<float, 2>(tensorInfo16, {0.43385774f, -0.17194885f, 0.2718237f, - 0.09215671f, 0.24107647f, -0.39835793f, - 0.18212086f, 0.01301402f, 0.48572797f, - -0.50656658f, 0.20047462f, -0.20607421f, - -0.51818722f, -0.15390486f, 0.0468148f, - 0.39922136f}); - - auto cellToInputWeights = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.}); - - auto inputGateBias = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.}); - - auto forgetGateBias = MakeTensor<float, 1>(tensorInfo4, {1., 1., 1., 1.}); - - auto cellBias = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.}); - - auto outputGateBias = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.}); - - armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo8); - armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo8); - armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo8); - armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo8); - armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo16); - armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo16); - armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo16); - armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo16); - armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4); - armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4); - armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4); - armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4); - armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4); - - AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); - AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); - AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); - AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); - AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); - - data.m_InputToInputWeights = &inputToInputWeightsTensor; - data.m_InputToForgetWeights = &inputToForgetWeightsTensor; - data.m_InputToCellWeights = &inputToCellWeightsTensor; - data.m_InputToOutputWeights = &inputToOutputWeightsTensor; - data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; - data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; - data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; - data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; - data.m_CellToInputWeights = &cellToInputWeightsTensor; - data.m_InputGateBias = &inputGateBiasTensor; - data.m_ForgetGateBias = &forgetGateBiasTensor; - data.m_CellBias = &cellBiasTensor; - data.m_OutputGateBias = &outputGateBiasTensor; - - - // Flags to set test configuration - data.m_Parameters.m_ActivationFunc = 4; - data.m_Parameters.m_CifgEnabled = false; - data.m_Parameters.m_PeepholeEnabled = false; - data.m_Parameters.m_ProjectionEnabled = false; - - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info); - inputHandle->Allocate(); - outputStateInHandle->Allocate(); - cellStateInHandle->Allocate(); - - scratchHandle->Allocate(); - outputStateOutHandle->Allocate(); - cellStateOutHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); - CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); - CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); - - return ret; -} - - -LayerTestResult<float, 2> -LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, - const boost::multi_array<float, 2>& input, - const boost::multi_array<float, 2>& outputExpected) { - - unsigned int batchSize = 2; - unsigned int outputSize = 16; - unsigned int inputSize = 5; - unsigned numUnits = 20; - - armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, armnn::GetDataType<float>()); - armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, armnn::GetDataType<float>()); - - // Scratch buffer size without CIFG [batchSize, numUnits * 3] - armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, armnn::GetDataType<float>()); - armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, armnn::GetDataType<float>()); - armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType<float>()); - - LayerTestResult<float, 2> ret(outputTensorInfo); - - std::vector<float> inputVector; - inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); - auto inputTensor = MakeTensor<float,2>(inputTensorInfo, inputVector); - - std::vector<float> cellStateInVector(batchSize * numUnits, 0.f); - auto cellStateInTensor = MakeTensor<float,2>(cellStateInTensorInfo, cellStateInVector); - - std::vector<float> outputStateInVector(batchSize * outputSize, 0.f); - auto outputStateInTensor = MakeTensor<float,2>(outputStateInTensorInfo, outputStateInVector); - - std::vector<float> scratchBufferVector(batchSize * numUnits * 3, 0.f); - auto scratchBufferTensor = MakeTensor<float,2>(scratchBufferTensorInfo, scratchBufferVector); - - std::vector<float> outputStateOutVector(batchSize * outputSize, 0.f); - auto outputStateOutTensor = MakeTensor<float,2>(outputStateOutTensorInfo, outputStateOutVector); - - std::vector<float> cellStateOutVector(batchSize * numUnits, 0.f); - auto cellStateOutTensor = MakeTensor<float,2>(cellStateOutTensorInfo, cellStateOutVector); - - std::vector<float> outputVector; - outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); - ret.outputExpected = MakeTensor<float, 2>(outputTensorInfo, outputVector); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> cellStateInHandle = - workloadFactory.CreateTensorHandle(cellStateInTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputStateInHandle = - workloadFactory.CreateTensorHandle(outputStateInTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle = - workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); - std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle = - workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::LstmQueueDescriptor data; - armnn::WorkloadInfo info; - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); - AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); - - AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); - AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); - AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - armnn::TensorInfo tensorInfo16({outputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfo20({numUnits}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfo20x5({numUnits, inputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfo20x16({numUnits, outputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfo16x20({outputSize, numUnits}, armnn::GetDataType<float>()); - - auto inputToInputWeights = - MakeTensor<float, 2>(tensorInfo20x5, {0.021393683f,0.06124551f, 0.046905167f,-0.014657677f,-0.03149463f, - 0.09171803f, 0.14647801f,0.10797193f, -0.0057968358f,0.0019193048f, - -0.2726754f, 0.10154029f, -0.018539885f, 0.080349885f, -0.10262385f, - -0.022599787f,-0.09121155f, -0.008675967f, -0.045206103f,-0.0821282f, - -0.008045952f,0.015478081f, 0.055217247f, 0.038719587f, 0.044153627f, - -0.06453243f,0.05031825f, -0.046935108f, -0.008164439f, 0.014574226f, - -0.1671009f, -0.15519552f, -0.16819797f,-0.13971269f,-0.11953059f, - 0.25005487f, -0.22790983f, 0.009855087f, -0.028140958f, -0.11200698f, - 0.11295408f, -0.0035217577f, 0.054485075f, 0.05184695f, 0.064711206f, - 0.10989193f, 0.11674786f, 0.03490607f, 0.07727357f, 0.11390585f, - -0.1863375f, -0.1034451f, -0.13945189f, -0.049401227f, -0.18767063f, - 0.042483903f, 0.14233552f, 0.13832581f, 0.18350165f, 0.14545603f, - -0.028545704f,0.024939531f,0.050929718f,0.0076203286f,-0.0029723682f, - -0.042484224f, -0.11827596f, -0.09171104f, -0.10808628f,-0.16327988f, - -0.2273378f, -0.0993647f, -0.017155107f,0.0023917493f,0.049272764f, - 0.0038534778f, 0.054764505f, 0.089753784f, 0.06947234f, 0.08014476f, - -0.04544234f, -0.0497073f,-0.07135631f, -0.048929106f,-0.004042012f, - -0.009284026f, 0.018042054f, 0.0036860977f,-0.07427302f, -0.11434604f, - -0.018995456f, 0.031487543f, 0.012834908f,0.019977754f,0.044256654f, - -0.39292613f, -0.18519334f, -0.11651281f,-0.06809892f, 0.011373677f - }); - - auto inputToForgetWeights = - MakeTensor<float, 2>(tensorInfo20x5, {-0.0018401089f, -0.004852237f,0.03698424f, 0.014181704f,0.028273236f, - -0.016726194f, -0.05249759f,-0.10204261f, 0.00861066f,-0.040979505f, - -0.009899187f,0.01923892f,-0.028177269f, -0.08535103f,-0.14585495f, - 0.10662567f,-0.01909731f,-0.017883534f,-0.0047269356f,-0.045103323f, - 0.0030784295f,0.076784775f,0.07463696f, 0.094531395f,0.0814421f, - -0.12257899f, -0.033945758f,-0.031303465f, 0.045630626f,0.06843887f, - -0.13492945f, -0.012480007f,-0.0811829f, -0.07224499f,-0.09628791f, - 0.045100946f,0.0012300825f, 0.013964662f, 0.099372394f,0.02543059f, - 0.06958324f, 0.034257296f, 0.0482646f, 0.06267997f,0.052625068f, - 0.12784666f, 0.07077897f, 0.025725935f, 0.04165009f,0.07241905f, - 0.018668644f, -0.037377294f,-0.06277783f,-0.08833636f,-0.040120605f, - -0.011405586f,-0.007808335f,-0.010301386f,-0.005102167f,0.027717464f, - 0.05483423f, 0.11449111f, 0.11289652f,0.10939839f, 0.13396506f, - -0.08402166f,-0.01901462f, -0.044678304f,-0.07720565f,0.014350063f, - -0.11757958f, -0.0652038f, -0.08185733f,-0.076754324f,-0.092614375f, - 0.10405491f, 0.052960336f, 0.035755895f,0.035839386f,-0.012540553f, - 0.036881298f, 0.02913376f, 0.03420159f,0.05448447f,-0.054523353f, - 0.02582715f, 0.02327355f, -0.011857179f,-0.0011980024f,-0.034641717f, - -0.026125094f,-0.17582615f,-0.15923657f,-0.27486774f,-0.0006143371f, - 0.0001771948f, -8.470171e-05f, 0.02651807f,0.045790765f,0.06956496f - }); - - auto inputToCellWeights = - MakeTensor<float, 2>(tensorInfo20x5, {-0.04580283f, -0.09549462f, -0.032418985f, -0.06454633f, - -0.043528453f, 0.043018587f, -0.049152344f, -0.12418144f, - -0.078985475f, -0.07596889f, 0.019484362f, -0.11434962f, - -0.0074034138f, -0.06314844f, -0.092981495f, 0.0062155537f, - -0.025034338f, -0.0028890965f, 0.048929527f, 0.06235075f, - 0.10665918f, -0.032036792f, -0.08505916f, -0.10843358f, - -0.13002433f, -0.036816437f, -0.02130134f, -0.016518239f, - 0.0047691227f, -0.0025825808f, 0.066017866f, 0.029991534f, - -0.10652836f, -0.1037554f, -0.13056071f, -0.03266643f, - -0.033702414f, -0.006473424f, -0.04611692f, 0.014419339f, - -0.025174323f, 0.0396852f, 0.081777506f, 0.06157468f, - 0.10210095f, -0.009658194f, 0.046511717f, 0.03603906f, - 0.0069369148f, 0.015960095f, -0.06507666f, 0.09551598f, - 0.053568836f, 0.06408714f, 0.12835667f, -0.008714329f, - -0.20211966f, -0.12093674f, 0.029450472f, 0.2849013f, - -0.029227901f, 0.1164364f, -0.08560263f, 0.09941786f, - -0.036999565f, -0.028842626f, -0.0033637602f, -0.017012902f, - -0.09720865f, -0.11193351f, -0.029155117f, -0.017936034f, - -0.009768936f, -0.04223324f, -0.036159635f, 0.06505112f, - -0.021742892f, -0.023377212f, -0.07221364f, -0.06430552f, - 0.05453865f, 0.091149814f, 0.06387331f, 0.007518393f, - 0.055960953f, 0.069779344f, 0.046411168f, 0.10509911f, - 0.07463894f, 0.0075130584f, 0.012850982f, 0.04555431f, - 0.056955688f, 0.06555285f, 0.050801456f, -0.009862683f, - 0.00826772f, -0.026555609f, -0.0073611983f, -0.0014897042f - }); - - auto inputToOutputWeights = - MakeTensor<float, 2>(tensorInfo20x5, {-0.0998932f, -0.07201956f, -0.052803773f,-0.15629593f,-0.15001918f, - -0.07650751f,0.02359855f, -0.075155355f, -0.08037709f, -0.15093534f, - 0.029517552f, -0.04751393f, 0.010350531f,-0.02664851f, -0.016839722f, - -0.023121163f, 0.0077019283f, 0.012851257f, -0.05040649f,-0.0129761f, - -0.021737747f,-0.038305793f,-0.06870586f, -0.01481247f,-0.001285394f, - 0.10124236f, 0.083122835f, 0.053313006f,-0.062235646f,-0.075637154f, - -0.027833903f, 0.029774971f, 0.1130802f, 0.09218906f, 0.09506135f, - -0.086665764f,-0.037162706f,-0.038880914f,-0.035832845f,-0.014481564f, - -0.09825003f,-0.12048569f,-0.097665586f,-0.05287633f, -0.0964047f, - -0.11366429f, 0.035777505f, 0.13568819f, 0.052451383f,0.050649304f, - 0.05798951f, -0.021852335f,-0.099848844f,0.014740475f,-0.078897946f, - 0.04974699f, 0.014160473f, 0.06973932f, 0.04964942f, 0.033364646f, - 0.08190124f, 0.025535367f, 0.050893165f, 0.048514254f,0.06945813f, - -0.078907564f,-0.06707616f, -0.11844508f, -0.09986688f,-0.07509403f, - 0.06263226f, 0.14925587f, 0.20188436f, 0.12098451f,0.14639415f, - 0.0015017595f, -0.014267382f, -0.03417257f,0.012711468f,0.0028300495f, - -0.024758482f, -0.05098548f,-0.0821182f, 0.014225672f, 0.021544158f, - 0.08949725f, 0.07505268f, -0.0020780868f, 0.04908258f,0.06476295f, - -0.022907063f,0.027562456f,0.040185735f, 0.019567577f,-0.015598739f, - -0.049097303f, -0.017121866f, -0.083368234f,-0.02332002f,-0.0840956f - }); - - auto inputGateBias = - MakeTensor<float, 1>(tensorInfo20, {0.02234832f, 0.14757581f, 0.18176508f, 0.10380666f, 0.053110216f, - -0.06928846f, -0.13942584f, -0.11816189f, 0.19483899f, 0.03652339f, - -0.10250295f, 0.036714908f, -0.18426876f, 0.036065217f, 0.21810818f, - 0.02383196f, -0.043370757f, 0.08690144f, -0.04444982f, 0.00030581196f - }); - - auto forgetGateBias = - MakeTensor<float, 1>(tensorInfo20, {0.035185695f, -0.042891346f, -0.03032477f, 0.23027696f, - 0.11098921f, 0.15378423f, 0.09263801f, 0.09790885f, - 0.09508917f, 0.061199076f, 0.07665568f, -0.015443159f, - -0.03499149f, 0.046190713f, 0.08895977f, 0.10899629f, - 0.40694186f, 0.06030037f, 0.012413437f, -0.06108739f - }); - - auto cellBias = - MakeTensor<float, 1>(tensorInfo20, {-0.024379363f, 0.0055531194f, 0.23377132f, 0.033463873f, - -0.1483596f, -0.10639995f, -0.091433935f, 0.058573797f, - -0.06809782f, -0.07889636f, -0.043246906f, -0.09829136f, - -0.4279842f, 0.034901652f, 0.18797937f, 0.0075234566f, - 0.016178843f, 0.1749513f, 0.13975595f, 0.92058027f - }); - - auto outputGateBias = - MakeTensor<float, 1>(tensorInfo20, {0.046159424f, -0.0012809046f, 0.03563469f, 0.12648113f, 0.027195795f, - 0.35373217f, -0.018957434f, 0.008907322f, -0.0762701f, 0.12018895f, - 0.04216877f, 0.0022856654f, 0.040952638f, 0.3147856f, 0.08225149f, - -0.057416286f, -0.14995944f, -0.008040261f, 0.13208859f, 0.029760877f - }); - - auto recurrentToInputWeights = - MakeTensor<float, 2>(tensorInfo20x16, {-0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f, - -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f, - -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f, - -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f, - 0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f, - 0.08981f, -0.045407712f, 0.08682226f, -0.06867011f, - -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f, - 0.14283475f, -0.07390571f, -0.06402044f, 0.062524505f, - -0.093129106f, 0.04860203f, -0.08364217f, -0.08119002f, - 0.009352075f, 0.22920375f, 0.0016303885f, 0.11583097f, - -0.13732095f, 0.012405723f, -0.07551853f, 0.06343048f, - 0.12162708f, -0.031923793f, -0.014335606f, 0.01790974f, - -0.10650317f, -0.0724401f, 0.08554849f, -0.05727212f, - 0.06556731f, -0.042729504f, -0.043227166f, 0.011683251f, - -0.013082158f, -0.029302018f, -0.010899579f, -0.062036745f, - -0.022509435f, -0.00964907f, -0.01567329f, 0.04260106f, - -0.07787477f, -0.11576462f, 0.017356863f, 0.048673786f, - -0.017577527f, -0.05527947f, -0.082487635f, -0.040137455f, - -0.10820036f, -0.04666372f, 0.022746278f, -0.07851417f, - 0.01068115f, 0.032956902f, 0.022433773f, 0.0026891115f, - 0.08944216f, -0.0685835f, 0.010513544f, 0.07228705f, - 0.02032331f, -0.059686817f, -0.0005566496f, -0.086984694f, - 0.040414046f, -0.1380399f, 0.094208956f, -0.05722982f, - 0.012092817f, -0.04989123f, -0.086576f, -0.003399834f, - -0.04696032f, -0.045747425f, 0.10091314f, 0.048676282f, - -0.029037097f, 0.031399418f, -0.0040285117f, 0.047237843f, - 0.09504992f, 0.041799378f, -0.049185462f, -0.031518843f, - -0.10516937f, 0.026374253f, 0.10058866f, -0.0033195973f, - -0.041975245f, 0.0073591834f, 0.0033782164f, -0.004325073f, - -0.10167381f, 0.042500053f, -0.01447153f, 0.06464186f, - -0.017142897f, 0.03312627f, 0.009205989f, 0.024138335f, - -0.011337001f, 0.035530265f, -0.010912711f, 0.0706555f, - -0.005894094f, 0.051841937f, -0.1401738f, -0.02351249f, - 0.0365468f, 0.07590991f, 0.08838724f, 0.021681072f, - -0.10086113f, 0.019608743f, -0.06195883f, 0.077335775f, - 0.023646897f, -0.095322326f, 0.02233014f, 0.09756986f, - -0.048691444f, -0.009579111f, 0.07595467f, 0.11480546f, - -0.09801813f, 0.019894179f, 0.08502348f, 0.004032281f, - 0.037211012f, 0.068537936f, -0.048005626f, -0.091520436f, - -0.028379958f, -0.01556313f, 0.06554592f, -0.045599163f, - -0.01672207f, -0.020169014f, -0.011877351f, -0.20212261f, - 0.010889619f, 0.0047078193f, 0.038385306f, 0.08540671f, - -0.017140968f, -0.0035865551f, 0.016678626f, 0.005633034f, - 0.015963363f, 0.00871737f, 0.060130805f, 0.028611384f, - 0.10109069f, -0.015060172f, -0.07894427f, 0.06401885f, - 0.011584063f, -0.024466386f, 0.0047652307f, -0.09041358f, - 0.030737216f, -0.0046374933f, 0.14215417f, -0.11823516f, - 0.019899689f, 0.006106124f, -0.027092824f, 0.0786356f, - 0.05052217f, -0.058925f, -0.011402121f, -0.024987547f, - -0.0013661642f, -0.06832946f, -0.015667673f, -0.1083353f, - -0.00096863037f, -0.06988685f, -0.053350925f, -0.027275559f, - -0.033664223f, -0.07978348f, -0.025200296f, -0.017207067f, - -0.058403496f, -0.055697463f, 0.005798788f, 0.12965427f, - -0.062582195f, 0.0013350133f, -0.10482091f, 0.0379771f, - 0.072521195f, -0.0029455067f, -0.13797039f, -0.03628521f, - 0.013806405f, -0.017858358f, -0.01008298f, -0.07700066f, - -0.017081132f, 0.019358726f, 0.0027079724f, 0.004635139f, - 0.062634714f, -0.02338735f, -0.039547626f, -0.02050681f, - 0.03385117f, -0.083611414f, 0.002862572f, -0.09421313f, - 0.058618143f, -0.08598433f, 0.00972939f, 0.023867095f, - -0.053934585f, -0.023203006f, 0.07452513f, -0.048767887f, - -0.07314807f, -0.056307215f, -0.10433547f, -0.06440842f, - 0.04328182f, 0.04389765f, -0.020006588f, -0.09076438f, - -0.11652589f, -0.021705797f, 0.03345259f, -0.010329105f, - -0.025767034f, 0.013057034f, -0.07316461f, -0.10145612f, - 0.06358255f, 0.18531723f, 0.07759293f, 0.12006465f, - 0.1305557f, 0.058638252f, -0.03393652f, 0.09622831f, - -0.16253184f, -2.4580743e-06f, 0.079869635f, -0.070196845f, - -0.005644518f, 0.06857898f, -0.12598175f, -0.035084512f, - 0.03156317f, -0.12794146f, -0.031963028f, 0.04692781f, - 0.030070418f, 0.0071660685f, -0.095516115f, -0.004643372f, - 0.040170413f, -0.062104587f, -0.0037324072f, 0.0554317f, - 0.08184801f, -0.019164372f, 0.06791302f, 0.034257166f, - -0.10307039f, 0.021943003f, 0.046745934f, 0.0790918f, - -0.0265588f, -0.007824208f, 0.042546265f, -0.00977924f, - -0.0002440307f, -0.017384544f, -0.017990116f, 0.12252321f, - -0.014512694f, -0.08251313f, 0.08861942f, 0.13589665f, - 0.026351685f, 0.012641483f, 0.07466548f, 0.044301085f, - -0.045414884f, -0.051112458f, 0.03444247f, -0.08502782f, - -0.04106223f, -0.028126027f, 0.028473156f, 0.10467447f - }); - - auto recurrentToForgetWeights = - MakeTensor<float, 2>(tensorInfo20x16, {-0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f, - 0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f, - -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f, - 0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f, - 0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f, - -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f, - -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f, - 0.061878487f, -0.04729229f, 0.034919553f, -0.07585433f, - -0.04421272f, -0.044019096f, 0.085488975f, 0.04058006f, - -0.06890133f, -0.030951202f, -0.024628663f, -0.07672815f, - 0.034293607f, 0.08556707f, -0.05293577f, -0.033561368f, - -0.04899627f, 0.0241671f, 0.015736353f, -0.095442444f, - -0.029564252f, 0.016493602f, -0.035026584f, 0.022337519f, - -0.026871363f, 0.004780428f, 0.0077918363f, -0.03601621f, - 0.016435321f, -0.03263031f, -0.09543275f, -0.047392778f, - 0.013454138f, 0.028934088f, 0.01685226f, -0.086110644f, - -0.046250615f, -0.01847454f, 0.047608484f, 0.07339695f, - 0.034546845f, -0.04881143f, 0.009128804f, -0.08802852f, - 0.03761666f, 0.008096139f, -0.014454086f, 0.014361001f, - -0.023502491f, -0.0011840804f, -0.07607001f, 0.001856849f, - -0.06509276f, -0.006021153f, -0.08570962f, -0.1451793f, - 0.060212336f, 0.055259194f, 0.06974018f, 0.049454916f, - -0.027794661f, -0.08077226f, -0.016179763f, 0.1169753f, - 0.17213494f, -0.0056326236f, -0.053934924f, -0.0124349f, - -0.11520337f, 0.05409887f, 0.088759385f, 0.0019655675f, - 0.0042065294f, 0.03881498f, 0.019844765f, 0.041858196f, - -0.05695512f, 0.047233116f, 0.038937137f, -0.06542224f, - 0.014429736f, -0.09719407f, 0.13908425f, -0.05379757f, - 0.012321099f, 0.082840554f, -0.029899208f, 0.044217527f, - 0.059855383f, 0.07711018f, -0.045319796f, 0.0948846f, - -0.011724666f, -0.0033288454f, -0.033542685f, -0.04764985f, - -0.13873616f, 0.040668588f, 0.034832682f, -0.015319203f, - -0.018715994f, 0.046002675f, 0.0599172f, -0.043107376f, - 0.0294216f, -0.002314414f, -0.022424703f, 0.0030315618f, - 0.0014641669f, 0.0029166266f, -0.11878115f, 0.013738511f, - 0.12375372f, -0.0006038222f, 0.029104086f, 0.087442465f, - 0.052958444f, 0.07558703f, 0.04817258f, 0.044462286f, - -0.015213451f, -0.08783778f, -0.0561384f, -0.003008196f, - 0.047060397f, -0.002058388f, 0.03429439f, -0.018839769f, - 0.024734668f, 0.024614193f, -0.042046934f, 0.09597743f, - -0.0043254104f, 0.04320769f, 0.0064070094f, -0.0019131786f, - -0.02558259f, -0.022822596f, -0.023273505f, -0.02464396f, - -0.10991725f, -0.006240552f, 0.0074488563f, 0.024044557f, - 0.04383914f, -0.046476185f, 0.028658995f, 0.060410924f, - 0.050786525f, 0.009452605f, -0.0073054377f, -0.024810238f, - 0.0052906186f, 0.0066939713f, -0.0020913032f, 0.014515517f, - 0.015898481f, 0.021362653f, -0.030262267f, 0.016587038f, - -0.011442813f, 0.041154444f, -0.007631438f, -0.03423484f, - -0.010977775f, 0.036152758f, 0.0066366293f, 0.11915515f, - 0.02318443f, -0.041350313f, 0.021485701f, -0.10906167f, - -0.028218046f, -0.00954771f, 0.020531068f, -0.11995105f, - -0.03672871f, 0.024019798f, 0.014255957f, -0.05221243f, - -0.00661567f, -0.04630967f, 0.033188973f, 0.10107534f, - -0.014027541f, 0.030796422f, -0.10270911f, -0.035999842f, - 0.15443139f, 0.07684145f, 0.036571592f, -0.035900835f, - -0.0034699554f, 0.06209149f, 0.015920248f, -0.031122351f, - -0.03858649f, 0.01849943f, 0.13872518f, 0.01503974f, - 0.069941424f, -0.06948533f, -0.0088794185f, 0.061282158f, - -0.047401894f, 0.03100163f, -0.041533746f, -0.10430945f, - 0.044574402f, -0.01425562f, -0.024290353f, 0.034563623f, - 0.05866852f, 0.023947537f, -0.09445152f, 0.035450947f, - 0.02247216f, -0.0042998926f, 0.061146557f, -0.10250651f, - 0.020881841f, -0.06747029f, 0.10062043f, -0.0023941975f, - 0.03532124f, -0.016341697f, 0.09685456f, -0.016764693f, - 0.051808182f, 0.05875331f, -0.04536488f, 0.001626336f, - -0.028892258f, -0.01048663f, -0.009793449f, -0.017093895f, - 0.010987891f, 0.02357273f, -0.00010856845f, 0.0099760275f, - -0.001845119f, -0.03551521f, 0.0018358806f, 0.05763657f, - -0.01769146f, 0.040995963f, 0.02235177f, -0.060430344f, - 0.11475477f, -0.023854522f, 0.10071741f, 0.0686208f, - -0.014250481f, 0.034261297f, 0.047418304f, 0.08562733f, - -0.030519066f, 0.0060542435f, 0.014653856f, -0.038836084f, - 0.04096551f, 0.032249358f, -0.08355519f, -0.026823482f, - 0.056386515f, -0.010401743f, -0.028396193f, 0.08507674f, - 0.014410365f, 0.020995233f, 0.17040324f, 0.11511526f, - 0.02459721f, 0.0066619175f, 0.025853224f, -0.023133837f, - -0.081302024f, 0.017264642f, -0.009585969f, 0.09491168f, - -0.051313367f, 0.054532815f, -0.014298593f, 0.10657464f, - 0.007076659f, 0.10964551f, 0.0409152f, 0.008275321f, - -0.07283536f, 0.07937492f, 0.04192024f, -0.1075027f - }); - - auto recurrentToCellWeights = - MakeTensor<float, 2>(tensorInfo20x16, {-0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f, - 0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f, - 0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f, - -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f, - 0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f, - 0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f, - -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f, - -0.019443132f, -0.030755889f, -0.0040000007f, 0.04465846f, - -0.021585021f, 0.0031670958f, 0.0053199246f, -0.056117613f, - -0.10893326f, 0.076739706f, -0.08509834f, -0.027997585f, - 0.037871376f, 0.01449768f, -0.09002357f, -0.06111149f, - -0.046195522f, 0.0422062f, -0.005683705f, -0.1253618f, - -0.012925729f, -0.04890792f, 0.06985068f, 0.037654128f, - 0.03398274f, -0.004781977f, 0.007032333f, -0.031787455f, - 0.010868644f, -0.031489216f, 0.09525667f, 0.013939797f, - 0.0058680447f, 0.0167067f, 0.02668468f, -0.04797466f, - -0.048885044f, -0.12722108f, 0.035304096f, 0.06554885f, - 0.00972396f, -0.039238118f, -0.05159735f, -0.11329045f, - 0.1613692f, -0.03750952f, 0.06529313f, -0.071974665f, - -0.11769596f, 0.015524369f, -0.0013754242f, -0.12446318f, - 0.02786344f, -0.014179351f, 0.005264273f, 0.14376344f, - 0.015983658f, 0.03406988f, -0.06939408f, 0.040699873f, - 0.02111075f, 0.09669095f, 0.041345075f, -0.08316494f, - -0.07684199f, -0.045768797f, 0.032298047f, -0.041805092f, - 0.0119405f, 0.0061010392f, 0.12652606f, 0.0064572375f, - -0.024950314f, 0.11574242f, 0.04508852f, -0.04335324f, - 0.06760663f, -0.027437469f, 0.07216407f, 0.06977076f, - -0.05438599f, 0.034033038f, -0.028602652f, 0.05346137f, - 0.043184172f, -0.037189785f, 0.10420091f, 0.00882477f, - -0.054019816f, -0.074273005f, -0.030617684f, -0.0028467078f, - 0.024302477f, -0.0038869337f, 0.005332455f, 0.0013399826f, - 0.04361412f, -0.007001822f, 0.09631092f, -0.06702025f, - -0.042049985f, -0.035070654f, -0.04103342f, -0.10273396f, - 0.0544271f, 0.037184782f, -0.13150354f, -0.0058036847f, - -0.008264958f, 0.042035464f, 0.05891794f, 0.029673764f, - 0.0063542654f, 0.044788733f, 0.054816857f, 0.062257513f, - -0.00093483756f, 0.048938446f, -0.004952862f, -0.007730018f, - -0.04043371f, -0.017094059f, 0.07229206f, -0.023670016f, - -0.052195564f, -0.025616996f, -0.01520939f, 0.045104615f, - -0.007376126f, 0.003533447f, 0.006570588f, 0.056037236f, - 0.12436656f, 0.051817212f, 0.028532185f, -0.08686856f, - 0.11868599f, 0.07663395f, -0.07323171f, 0.03463402f, - -0.050708205f, -0.04458982f, -0.11590894f, 0.021273347f, - 0.1251325f, -0.15313013f, -0.12224372f, 0.17228661f, - 0.023029093f, 0.086124025f, 0.006445803f, -0.03496501f, - 0.028332196f, 0.04449512f, -0.042436164f, -0.026587414f, - -0.006041347f, -0.09292539f, -0.05678812f, 0.03897832f, - 0.09465633f, 0.008115513f, -0.02171956f, 0.08304309f, - 0.071401566f, 0.019622514f, 0.032163795f, -0.004167056f, - 0.02295182f, 0.030739572f, 0.056506045f, 0.004612461f, - 0.06524936f, 0.059999723f, 0.046395954f, -0.0045512207f, - -0.1335546f, -0.030136576f, 0.11584653f, -0.014678886f, - 0.0020118146f, -0.09688814f, -0.0790206f, 0.039770417f, - -0.0329582f, 0.07922767f, 0.029322514f, 0.026405897f, - 0.04207835f, -0.07073373f, 0.063781224f, 0.0859677f, - -0.10925287f, -0.07011058f, 0.048005477f, 0.03438226f, - -0.09606514f, -0.006669445f, -0.043381985f, 0.04240257f, - -0.06955775f, -0.06769346f, 0.043903265f, -0.026784198f, - -0.017840602f, 0.024307009f, -0.040079936f, -0.019946516f, - 0.045318738f, -0.12233574f, 0.026170589f, 0.0074471775f, - 0.15978073f, 0.10185836f, 0.10298046f, -0.015476589f, - -0.039390966f, -0.072174534f, 0.0739445f, -0.1211869f, - -0.0347889f, -0.07943156f, 0.014809798f, -0.12412325f, - -0.0030663363f, 0.039695457f, 0.0647603f, -0.08291318f, - -0.018529687f, -0.004423833f, 0.0037507233f, 0.084633216f, - -0.01514876f, -0.056505352f, -0.012800942f, -0.06994386f, - 0.012962922f, -0.031234352f, 0.07029052f, 0.016418684f, - 0.03618972f, 0.055686004f, -0.08663945f, -0.017404709f, - -0.054761406f, 0.029065743f, 0.052404847f, 0.020238016f, - 0.0048197987f, -0.0214882f, 0.07078733f, 0.013016777f, - 0.06262858f, 0.009184685f, 0.020785125f, -0.043904778f, - -0.0270329f, -0.03299152f, -0.060088247f, -0.015162964f, - -0.001828936f, 0.12642565f, -0.056757294f, 0.013586685f, - 0.09232601f, -0.035886683f, 0.06000002f, 0.05229691f, - -0.052580316f, -0.082029596f, -0.010794592f, 0.012947712f, - -0.036429964f, -0.085508935f, -0.13127148f, -0.017744139f, - 0.031502828f, 0.036232427f, -0.031581745f, 0.023051167f, - -0.05325106f, -0.03421577f, 0.028793324f, -0.034633752f, - -0.009881397f, -0.043551125f, -0.018609839f, 0.0019097115f, - -0.008799762f, 0.056595087f, 0.0022273948f, 0.055752404f - }); - - auto recurrentToOutputWeights = - MakeTensor<float, 2>(tensorInfo20x16, {0.025825322f, -0.05813119f, 0.09495884f,-0.045984812f, -0.01255415f, - -0.0026479573f,-0.08196161f,-0.054914974f,-0.0046604523f, - -0.029587349f, -0.044576716f, -0.07480124f, -0.082868785f, - 0.023254942f, 0.027502948f, -0.0039728214f, -0.08683098f, - -0.08116779f, -0.014675607f, -0.037924774f, -0.023314456f, - -0.007401714f, -0.09255757f, 0.029460307f, -0.08829125f, - -0.005139627f, -0.08989442f, -0.0555066f, 0.13596267f, - -0.025062224f, -0.048351806f, -0.03850004f, 0.07266485f, - -0.022414139f, 0.05940088f, 0.075114764f, 0.09597592f, - -0.010211725f, -0.0049794707f, -0.011523867f, -0.025980417f, - 0.072999895f, 0.11091378f, -0.081685916f, 0.014416728f, - 0.043229222f, 0.034178585f, -0.07530371f, 0.035837382f, - -0.085607f, -0.007721233f, -0.03287832f, -0.043848954f, - -0.06404588f, -0.06632928f, -0.073643476f, 0.008214239f, - -0.045984086f, 0.039764922f, 0.03474462f, 0.060612556f, - -0.080590084f, 0.049127717f, 0.04151091f, -0.030063879f, - 0.008801774f, -0.023021035f, -0.019558564f, 0.05158114f, - -0.010947698f, -0.011825728f, 0.0075720972f, 0.0699727f, - -0.0039981045f, 0.069350146f, 0.08799282f, 0.016156472f, - 0.035502106f, 0.11695009f, 0.006217345f, 0.13392477f, - -0.037875112f, 0.025745004f, 0.08940699f, -0.00924166f, - 0.0046702605f, -0.036598757f, -0.08811812f, 0.10522024f, - -0.032441203f, 0.008176899f, -0.04454919f, 0.07058152f, - 0.0067963637f, 0.039206743f, 0.03259838f, 0.03725492f, - -0.09515802f, 0.013326398f, -0.052055415f, -0.025676316f, - 0.03198509f, -0.015951829f, -0.058556724f, 0.036879618f, - 0.043357447f, 0.028362012f, -0.05908629f, 0.0059240665f, - -0.04995891f, -0.019187413f,0.0276265f, -0.01628143f, 0.0025863599f, - 0.08800015f, 0.035250366f, -0.022165963f, -0.07328642f, - -0.009415526f, -0.07455109f, 0.11690406f, 0.0363299f, - 0.07411125f, 0.042103454f, -0.009660886f, 0.019076364f, - 0.018299393f, -0.046004917f, 0.08891175f,0.0431396f, -0.026327137f, - -0.051502608f, 0.08979574f, -0.051670972f, 0.04940282f, - -0.07491107f, -0.021240504f, 0.022596184f, -0.034280192f, - 0.060163025f, -0.058211457f, -0.051837247f, -0.01349775f, - -0.04639988f, -0.035936575f, -0.011681591f, 0.064818054f, - 0.0073146066f, -0.021745546f, -0.043124277f, -0.06471268f, - -0.07053354f, -0.029321948f, -0.05330136f, 0.016933719f, - -0.053782392f, 0.13747959f, -0.1361751f, -0.11569455f, - 0.0033329215f, 0.05693899f, -0.053219706f, 0.063698f, - 0.07977434f, -0.07924483f, 0.06936997f, 0.0034815092f, - -0.007305279f, -0.037325785f, -0.07251102f, -0.033633437f, - -0.08677009f, 0.091591336f, -0.14165086f, 0.021752775f, - 0.019683983f, 0.0011612234f, -0.058154266f, 0.049996935f, - 0.0288841f, -0.0024567875f, -0.14345716f, 0.010955264f,-0.10234828f, - 0.1183656f, -0.0010731248f, -0.023590032f,-0.072285876f,-0.0724771f, - -0.026382286f, -0.0014920527f, 0.042667855f, 0.0018776858f, - 0.02986552f, 0.009814309f, 0.0733756f, 0.12289186f, - 0.018043943f, -0.0458958f, 0.049412545f, 0.033632483f, - 0.05495232f, 0.036686596f, -0.013781798f, -0.010036754f, - 0.02576849f, -0.08307328f, 0.010112348f, 0.042521734f, - -0.05869831f, -0.071689695f, 0.03876447f, -0.13275425f, -0.0352966f, - -0.023077697f, 0.10285965f, 0.084736146f, 0.15568255f, - -0.00040734606f, 0.027835453f, -0.10292561f, -0.032401145f, - 0.10053256f, -0.026142767f, -0.08271222f, -0.0030240538f, - -0.016368777f, 0.1070414f, 0.042672627f, 0.013456989f, - -0.0437609f, -0.022309763f, 0.11576483f, 0.04108048f, - 0.061026827f, -0.0190714f, -0.0869359f, 0.037901703f, 0.0610107f, - 0.07202949f, 0.01675338f, 0.086139716f, -0.08795751f, - -0.014898893f, -0.023771819f, -0.01965048f, 0.007955471f, - -0.043740474f, 0.03346837f, -0.10549954f, 0.090567775f, - 0.042013682f, -0.03176985f, 0.12569028f, -0.02421228f, - -0.029526481f, 0.023851605f, 0.031539805f, 0.05292009f, - -0.02344001f, -0.07811758f, -0.08834428f, 0.10094801f, - 0.16594367f, -0.06861939f, -0.021256343f, -0.041093912f, - -0.06669611f, 0.035498552f, 0.021757556f, -0.09302526f, - -0.015403468f, -0.06614931f, -0.051798206f, -0.013874718f, - 0.03630673f, 0.010412845f, -0.08077351f, 0.046185967f, - 0.0035662893f, 0.03541868f, -0.094149634f, -0.034814864f, - 0.003128424f, -0.020674974f, -0.03944324f, -0.008110165f, - -0.11113267f, 0.08484226f, 0.043586485f, 0.040582247f, - 0.0968012f, -0.065249965f, -0.028036479f, 0.0050708856f, - 0.0017462453f, 0.0326779f, 0.041296225f, 0.09164146f, - -0.047743853f, -0.015952192f, -0.034451712f, 0.084197424f, - -0.05347844f, -0.11768019f, 0.085926116f, -0.08251791f, - -0.045081906f, 0.0948852f, 0.068401024f, 0.024856757f, - 0.06978981f, -0.057309967f, -0.012775832f, -0.0032452994f, - 0.01977615f, -0.041040014f, -0.024264973f,0.063464895f, 0.05431621f - }); - - auto cellToInputWeights = - MakeTensor<float, 1>(tensorInfo20, {0.040369894f, 0.030746894f, 0.24704495f, 0.018586371f, -0.037586458f, - -0.15312155f, -0.11812848f, -0.11465643f, 0.20259799f, 0.11418174f, - -0.10116027f, -0.011334949f, 0.12411352f, -0.076769054f,-0.052169047f, - 0.21198851f, -0.38871562f, -0.09061183f, -0.09683246f, -0.21929175f - }); - - - auto cellToForgetWeights = - MakeTensor<float, 1>(tensorInfo20, {-0.01998659f,-0.15568835f,-0.24248174f, -0.012770197f, 0.041331276f, - -0.072311886f, -0.052123554f,-0.0066330447f,-0.043891653f,0.036225766f, - -0.047248036f, 0.021479502f,0.033189066f, 0.11952997f, -0.020432774f, - 0.64658105f, -0.06650122f, -0.03467612f, 0.095340036f, 0.23647355f - }); - - auto cellToOutputWeights = - MakeTensor<float, 1>(tensorInfo20, {0.08286371f, -0.08261836f, -0.51210177f, 0.002913762f, 0.17764764f, - -0.5495371f, -0.08460716f, -0.24552552f, 0.030037103f, 0.04123544f, - -0.11940523f, 0.007358328f, 0.1890978f, 0.4833202f, -0.34441817f, - 0.36312827f, -0.26375428f, 0.1457655f, -0.19724406f, 0.15548733f - }); - - auto projectionWeights = - MakeTensor<float, 2>(tensorInfo16x20, - {-0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f, - 0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f, - -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f, - -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f, - 0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f, - 0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f, - 0.08682067f, 0.17240396f, 0.014975425f, 0.056431185f, 0.031037588f, - 0.16702051f, 0.0077946745f, 0.15140012f, 0.29405436f, 0.120285f, - -0.188994f, -0.027265169f, 0.043389652f, -0.022061434f, 0.014777949f, - -0.20203483f, 0.094781205f, 0.19100232f, 0.13987629f, -0.036132768f, - -0.06426278f, -0.05108664f, 0.13221376f, 0.009441198f, -0.16715929f, - 0.15859416f, -0.040437475f, 0.050779544f, -0.022187516f, 0.012166504f, - 0.027685808f, -0.07675938f, -0.0055694645f, -0.09444123f, 0.0046453946f, - 0.050794356f, 0.10770313f, -0.20790008f, -0.07149004f, -0.11425117f, - 0.008225835f, -0.035802525f, 0.14374903f, 0.15262283f, 0.048710253f, - 0.1847461f, -0.007487823f, 0.11000021f, -0.09542012f, 0.22619456f, - -0.029149994f, 0.08527916f, 0.009043713f, 0.0042746216f, 0.016261552f, - 0.022461696f, 0.12689082f, -0.043589946f, -0.12035478f, -0.08361797f, - -0.050666027f, -0.1248618f, -0.1275799f, -0.071875185f, 0.07377272f, - 0.09944291f, -0.18897448f, -0.1593054f, -0.06526116f, -0.040107165f, - -0.004618631f, -0.067624845f, -0.007576253f, 0.10727444f, 0.041546922f, - -0.20424393f, 0.06907816f, 0.050412357f, 0.00724631f, 0.039827548f, - 0.12449835f, 0.10747581f, 0.13708383f, 0.09134148f, -0.12617786f, - -0.06428341f, 0.09956831f, 0.1208086f, -0.14676677f, -0.0727722f, - 0.1126304f, 0.010139365f, 0.015571211f, -0.038128063f, 0.022913318f, - -0.042050496f, 0.16842307f, -0.060597885f, 0.10531834f, -0.06411776f, - -0.07451711f, -0.03410368f, -0.13393489f, 0.06534304f, 0.003620307f, - 0.04490757f, 0.05970546f, 0.05197996f, 0.02839995f, 0.10434969f, - -0.013699693f, -0.028353551f, -0.07260381f, 0.047201227f, -0.024575593f, - -0.036445823f, 0.07155557f, 0.009672501f, -0.02328883f, 0.009533515f, - -0.03606021f, -0.07421458f, -0.028082801f, -0.2678904f, -0.13221288f, - 0.18419984f, -0.13012612f, -0.014588381f, -0.035059117f, -0.04824723f, - 0.07830115f, -0.056184657f, 0.03277091f, 0.025466874f, 0.14494097f, - -0.12522776f, -0.098633975f, -0.10766018f, -0.08317623f, 0.08594209f, - 0.07749552f, 0.039474737f, 0.1776665f, -0.07409566f, -0.0477268f, - 0.29323658f, 0.10801441f, 0.1154011f, 0.013952499f, 0.10739139f, - 0.10708251f, -0.051456142f, 0.0074137426f, -0.10430189f, 0.10034707f, - 0.045594677f, 0.0635285f, -0.0715442f, -0.089667566f, -0.10811871f, - 0.00026344223f, 0.08298446f, -0.009525053f, 0.006585689f, -0.24567553f, - -0.09450807f, 0.09648481f, 0.026996298f, -0.06419476f, -0.04752702f, - -0.11063944f, -0.23441927f, -0.17608605f, -0.052156363f, 0.067035615f, - 0.19271925f, -0.0032889997f, -0.043264326f, 0.09663576f, -0.057112187f, - -0.10100678f, 0.0628376f, 0.04447668f, 0.017961001f, -0.10094388f, - -0.10190601f, 0.18335468f, 0.10494553f, -0.052095775f, -0.0026118709f, - 0.10539724f, -0.04383912f, -0.042349473f, 0.08438151f, -0.1947263f, - 0.02251204f, 0.11216432f, -0.10307853f, 0.17351969f, -0.039091777f, - 0.08066188f, -0.00561982f, 0.12633002f, 0.11335965f, -0.0088127935f, - -0.019777594f, 0.06864014f, -0.059751723f, 0.016233567f, -0.06894641f, - -0.28651384f, -0.004228674f, 0.019708522f, -0.16305895f, -0.07468996f, - -0.0855457f, 0.099339016f, -0.07580735f, -0.13775392f, 0.08434318f, - 0.08330512f, -0.12131499f, 0.031935584f, 0.09180414f, -0.08876437f, - -0.08049874f, 0.008753825f, 0.03498998f, 0.030215185f, 0.03907079f, - 0.089751154f, 0.029194152f, -0.03337423f, -0.019092513f, 0.04331237f, - 0.04299654f, -0.036394123f, -0.12915532f, 0.09793732f, 0.07512415f, - -0.11319543f, -0.032502122f, 0.15661901f, 0.07671967f, -0.005491124f, - -0.19379048f, -0.218606f, 0.21448623f, 0.017840758f, 0.1416943f, - -0.07051762f, 0.19488361f, 0.02664691f, -0.18104725f, -0.09334311f, - 0.15026465f, -0.15493552f, -0.057762887f, -0.11604192f, -0.262013f, - -0.01391798f, 0.012185008f, 0.11156489f, -0.07483202f, 0.06693364f, - -0.26151478f, 0.046425626f, 0.036540434f, -0.16435726f, 0.17338543f, - -0.21401681f, -0.11385144f, -0.08283257f, -0.069031075f, 0.030635102f, - 0.010969227f, 0.11109743f, 0.010919218f, 0.027526086f, 0.13519906f, - 0.01891392f, -0.046839405f, -0.040167913f, 0.017953383f, -0.09700955f, - 0.0061885654f, -0.07000971f, 0.026893595f, -0.038844477f, 0.14543656f - }); - - std::vector<float> projectionBiasVector(outputSize, 0.f); - auto projectionBias = MakeTensor<float,1>(tensorInfo16, projectionBiasVector); - - armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo20x5); - armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo20x5); - armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo20x5); - armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo20x5); - armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo20x16); - armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo20x16); - armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo20x16); - armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo20x16); - armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo20); - armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo20); - armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo20); - armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo20); - armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo20); - armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo20); - armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo20); - armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo16x20); - armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo16); - - AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); - AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); - AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); - AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); - AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); - AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); - AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); - AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]); - - data.m_InputToInputWeights = &inputToInputWeightsTensor; - data.m_InputToForgetWeights = &inputToForgetWeightsTensor; - data.m_InputToCellWeights = &inputToCellWeightsTensor; - data.m_InputToOutputWeights = &inputToOutputWeightsTensor; - data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; - data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; - data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; - data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; - data.m_CellToInputWeights = &cellToInputWeightsTensor; - data.m_InputGateBias = &inputGateBiasTensor; - data.m_ForgetGateBias = &forgetGateBiasTensor; - data.m_CellBias = &cellBiasTensor; - data.m_OutputGateBias = &outputGateBiasTensor; - data.m_CellToForgetWeights = &cellToForgetWeightsTensor; - data.m_CellToOutputWeights = &cellToOutputWeightsTensor; - data.m_ProjectionWeights = &projectionWeightsTensor; - data.m_ProjectionBias = &projectionBiasTensor; - - // Flags to set test configuration - data.m_Parameters.m_ActivationFunc = 4; - data.m_Parameters.m_CifgEnabled = false; - data.m_Parameters.m_PeepholeEnabled = true; - data.m_Parameters.m_ProjectionEnabled = true; - - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info); - inputHandle->Allocate(); - outputStateInHandle->Allocate(); - cellStateInHandle->Allocate(); - - scratchHandle->Allocate(); - outputStateOutHandle->Allocate(); - cellStateOutHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); - CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); - CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); - - return ret; - -} - - -LayerTestResult<float, 2> LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, - const boost::multi_array<float, 2>& input, - const boost::multi_array<float, 2>& outputExpected) -{ - bool cifgEnabled = true; - bool peepholeEnabled = true; - bool projectionEnabled = false; - // These are not the input and the output of Lstm yet - unsigned int batchSize = boost::numeric_cast<unsigned int>(input.shape()[0]); - unsigned int inputSize = boost::numeric_cast<unsigned int>(input.shape()[1]); - - unsigned int outputSize = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); - - const unsigned int cellSize = outputSize; - - // Decide the shape of all input tensors - armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo outputStateInTensorInfo({batchSize, outputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo cellStateInTensorInfo({batchSize, cellSize}, armnn::GetDataType<float>()); - - unsigned int scratchBufferSize = cifgEnabled ? cellSize * 4 : cellSize * 3; - armnn::TensorInfo scratchBufferTensorInfo({batchSize, scratchBufferSize}, armnn::GetDataType<float>()); - armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo cellStateOutTensorInfo({batchSize, cellSize}, armnn::GetDataType<float>()); - armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType<float>()); - - // List of inputs - std::vector<float> inputData; - inputData.assign(input.data(), input.data() + batchSize*inputSize); - auto inputTensor = MakeTensor<float,2>(inputTensorInfo, inputData); - - std::vector<float> outputStateInVector(batchSize * outputSize, 0.f); - auto outputStateInTensor = MakeTensor<float, 2>(outputStateInTensorInfo, outputStateInVector); - - std::vector<float> cellStateInVector(batchSize * cellSize, 0.f); - auto cellStateInTensor = MakeTensor<float, 2>(cellStateInTensorInfo, cellStateInVector); - - - // Prepare all the weights in the descriptor for LSTM - armnn::LstmQueueDescriptor data; - armnn::TensorInfo tensorInfoInput({cellSize, inputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfoOutput({cellSize, outputSize}, armnn::GetDataType<float>()); - armnn::TensorInfo tensorInfoNumUnits({cellSize}, armnn::GetDataType<float>()); - - auto inputToCellWeights = MakeTensor<float, 2>(tensorInfoInput, - {-0.49770179f, -0.27711356f, -0.09624726f, 0.05100781f, - 0.04717243f, 0.48944736f, -0.38535351f, - -0.17212132f}); - auto inputToForgetWeights = MakeTensor<float, 2>(tensorInfoInput, - {-0.55291498f, -0.42866567f, 0.13056988f, - -0.3633365f, -0.22755712f, 0.28253698f, 0.24407166f, - 0.33826375f}); - auto inputToOutputWeights = MakeTensor<float, 2>(tensorInfoInput, - {0.10725588f, -0.02335852f, -0.55932593f, - -0.09426838f, -0.44257352f, 0.54939759f, - 0.01533556f, 0.42751634f}); - auto cellBias = MakeTensor<float, 1>(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f}); - auto forgetGateBias = MakeTensor<float, 1>(tensorInfoNumUnits, {1.f, 1.f, 1.f, 1.f}); - auto outputGateBias = MakeTensor<float, 1>(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f}); - - auto recurrentToCellWeights = MakeTensor<float, 2>(tensorInfoOutput, - {0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f, 0.42957711f, - 0.01841056f, -0.32764608f, -0.33027974f, -0.10826075f, 0.20675004f, - 0.19069612f, -0.03026325f, -0.54532051f, 0.33003211f, 0.44901288f, - 0.21193194f}); - auto recurrentToForgetWeights = MakeTensor<float, 2>(tensorInfoOutput, - {-0.13832897f, -0.0515101f, -0.2359007f, -0.16661474f, -0.14340827f, - 0.36986142f, 0.23414481f, 0.55899f, 0.10798943f, -0.41174671f, 0.17751795f, - -0.34484994f, -0.35874045f, -0.11352962f, 0.27268326f, 0.54058349f}); - - auto recurrentToOutputWeights = MakeTensor<float, 2>(tensorInfoOutput, - {0.41613156f, 0.42610586f, -0.16495961f, -0.5663873f, 0.30579174f, -0.05115908f, - -0.33941799f, 0.23364776f, 0.11178309f, 0.09481031f, -0.26424935f, 0.46261835f, - 0.50248802f, 0.26114327f, -0.43736315f, 0.33149987f}); - - auto cellToForgetWeights = MakeTensor<float, 1>(tensorInfoNumUnits, - {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f}); - auto cellToOutputWeights = MakeTensor<float, 1>(tensorInfoNumUnits, - {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f}); - - armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfoInput); - armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfoInput); - armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfoInput); - - armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfoNumUnits); - armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfoNumUnits); - armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfoNumUnits); - - armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfoOutput); - armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfoOutput); - armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfoOutput); - - - armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfoNumUnits); - armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfoNumUnits); - - AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); - - AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); - AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); - AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); - - AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); - AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); - - AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); - AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); - - - data.m_InputToCellWeights = &inputToCellWeightsTensor; - data.m_InputToForgetWeights = &inputToForgetWeightsTensor; - data.m_InputToOutputWeights = &inputToOutputWeightsTensor; - - data.m_CellBias = &cellBiasTensor; - data.m_ForgetGateBias = &forgetGateBiasTensor; - data.m_OutputGateBias = &outputGateBiasTensor; - - data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; - data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; - data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; - - data.m_CellToForgetWeights = &cellToForgetWeightsTensor; - data.m_CellToOutputWeights = &cellToOutputWeightsTensor; - - // other parameters for the descriptor - data.m_Parameters.m_CifgEnabled = cifgEnabled; - data.m_Parameters.m_ProjectionEnabled = projectionEnabled; - data.m_Parameters.m_PeepholeEnabled = peepholeEnabled; - - data.m_Parameters.m_ActivationFunc = 4; - data.m_Parameters.m_ClippingThresProj = 0.0; - data.m_Parameters.m_ClippingThresCell = 0.0; - - - // List of outputs - std::vector<float> scratchBufferVector(batchSize * scratchBufferSize, 0.f); - auto scratchBufferTensor = MakeTensor<float,2>(scratchBufferTensorInfo, scratchBufferVector); - LayerTestResult<float, 2> ret0(scratchBufferTensorInfo); - - // Output state for a certain time step - std::vector<float> outputStateOutVector(batchSize * outputSize, 0.f); - auto outputStateOutTensor = MakeTensor<float,2>(outputStateOutTensorInfo, outputStateOutVector); - LayerTestResult<float, 2> ret1(outputStateOutTensorInfo); - - // Cell state for a certain time step - std::vector<float> cellStateOutVector(batchSize * cellSize, 0.f); - auto cellStateOutTensor = MakeTensor<float,2>(cellStateOutTensorInfo, cellStateOutVector); - LayerTestResult<float, 2> ret2(cellStateOutTensorInfo); - - // Output for a certain time step - std::vector<float> outputVector(batchSize * outputSize, 0.f); - auto outputTensor = MakeTensor<float, 2>(outputTensorInfo, outputVector); - std::vector<float> outputData; - outputData.assign(outputExpected.data(), outputExpected.data() + batchSize*outputSize); - LayerTestResult<float, 2> ret3(outputTensorInfo); - ret3.outputExpected = MakeTensor<float, 2>(outputTensorInfo, outputData); - - // Prepare the inputs and outputs for the workload - std::unique_ptr<armnn::ITensorHandle> inputHandle = - workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputStateInHandle = - workloadFactory.CreateTensorHandle(outputStateInTensorInfo); - std::unique_ptr<armnn::ITensorHandle> cellStateInHandle = - workloadFactory.CreateTensorHandle(cellStateInTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> scratchBufferHandle = - workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle = - workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); - std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle = - workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = - workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); - AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); - - AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchBufferHandle.get()); - AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); - AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info); - - - inputHandle->Allocate(); - outputStateInHandle->Allocate(); - cellStateInHandle->Allocate(); - - scratchBufferHandle->Allocate(); - outputStateOutHandle->Allocate(); - cellStateOutHandle->Allocate(); - outputHandle->Allocate(); - - - CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); - CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); - CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); - - CopyDataToITensorHandle(scratchBufferHandle.get(), &scratchBufferTensor[0][0]); - CopyDataToITensorHandle(outputStateOutHandle.get(), &outputStateOutTensor[0][0]); - CopyDataToITensorHandle(cellStateOutHandle.get(), &cellStateOutTensor[0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret0.output[0][0], scratchBufferHandle.get()); - CopyDataFromITensorHandle(&ret1.output[0][0], outputStateOutHandle.get()); - CopyDataFromITensorHandle(&ret2.output[0][0], cellStateOutHandle.get()); - CopyDataFromITensorHandle(&ret3.output[0][0], outputHandle.get()); - - return ret3; -} diff --git a/src/armnn/backends/test/MemCopyTests.cpp b/src/armnn/backends/test/MemCopyTests.cpp deleted file mode 100644 index 44089c9d65..0000000000 --- a/src/armnn/backends/test/MemCopyTests.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include <boost/test/unit_test.hpp> -#include <boost/multi_array.hpp> - -#include "armnn/ArmNN.hpp" -#include "backends/RefWorkloadFactory.hpp" -#if ARMCOMPUTECL_ENABLED -#include "backends/ClWorkloadFactory.hpp" -#endif -#if ARMCOMPUTENEON_ENABLED -#include "backends/NeonWorkloadFactory.hpp" -#endif -#include "backends/CpuTensorHandle.hpp" -#include "test/TensorHelpers.hpp" - -#include "TensorCopyUtils.hpp" -#include "WorkloadTestUtils.hpp" - -#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED -#include "../ArmComputeTensorUtils.hpp" -#endif - -BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) - -void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, - bool withSubtensors) -{ - const std::array<unsigned int, 4> shapeData = { { 1u, 1u, 6u, 5u } }; - const armnn::TensorShape tensorShape(4, shapeData.data()); - const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); - boost::multi_array<float, 4> inputData = MakeTensor<float, 4>(tensorInfo, std::vector<float>( - { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - }) - ); - - boost::multi_array<float, 4> outputData(shapeData); - - auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); - auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); - - AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); - outputTensorHandle->Allocate(); - - armnn::MemCopyQueueDescriptor memCopyQueueDesc; - armnn::WorkloadInfo workloadInfo; - - const unsigned int origin[4] = {}; - - auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) - ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) - : std::move(inputTensorHandle); - auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) - ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) - : std::move(outputTensorHandle); - - AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); - AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); - - dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); - - CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); - - BOOST_TEST(CompareTensors(inputData, outputData)); -} - -template <typename SrcWorkloadFactory, typename DstWorkloadFactory> -void MemCopyTest(bool withSubtensors) -{ - SrcWorkloadFactory srcWorkloadFactory; - DstWorkloadFactory dstWorkloadFactory; - MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); -} - -#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(AclTypeConversions) -{ - arm_compute::Strides strides(1,2,3,4); - armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides); - BOOST_TEST(convertedStrides[0] == 4); - BOOST_TEST(convertedStrides[1] == 3); - BOOST_TEST(convertedStrides[2] == 2); - BOOST_TEST(convertedStrides[3] == 1); - - arm_compute::TensorShape shape(5,6,7,8); - armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape); - BOOST_TEST(convertedshape[0] == 8); - BOOST_TEST(convertedshape[1] == 7); - BOOST_TEST(convertedshape[2] == 6); - BOOST_TEST(convertedshape[3] == 5); -} -#endif - -#if ARMCOMPUTECL_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) -{ - MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) -{ - MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) -{ - MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) -{ - MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(true); -} - -#endif // ARMCOMPUTECL_ENABLED - -#if ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) -{ - MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) -{ - MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) -{ - MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) -{ - MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(true); -} - -#endif // ARMCOMPUTENEON_ENABLED - -#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) -{ - MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) -{ - MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) -{ - MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) -{ - MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(true); -} - -#endif - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/NormTestImpl.hpp b/src/armnn/backends/test/NormTestImpl.hpp deleted file mode 100644 index 2690313655..0000000000 --- a/src/armnn/backends/test/NormTestImpl.hpp +++ /dev/null @@ -1,241 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "armnn/Exceptions.hpp" -#include "armnn/LayerSupport.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -LayerTestResult<float,4> SimpleNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, - armnn::NormalizationAlgorithmChannel normChannel, - armnn::NormalizationAlgorithmMethod normMethod) -{ - const unsigned int inputHeight = 2; - const unsigned int inputWidth = 2; - const unsigned int inputChannels = 1; - const unsigned int inputNum = 2; - - unsigned int outputHeight = inputHeight; - unsigned int outputWidth = inputWidth; - unsigned int outputChannels = inputChannels; - unsigned int outputNum = inputNum; - - unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; - unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; - - auto inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - auto outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - LayerTestResult<float,4> ret(outputTensorInfo); - - auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ - // Batch #0 - 1.0f, 2.0f, - 3.0f, 4.0f, - // Batch #1 - 5.0f, 6.0f, - 7.0f, 8.0f - })); - - float alpha = 1.f; - float beta = 1.f; - float kappa = 1.f; - uint32_t normSize = 3; - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::NormalizationQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Parameters.m_NormChannelType = normChannel; - data.m_Parameters.m_NormMethodType = normMethod; - data.m_Parameters.m_NormSize = normSize; - data.m_Parameters.m_Alpha = alpha; - data.m_Parameters.m_Beta = beta; - data.m_Parameters.m_K = kappa; - - armnn::PassthroughCpuTensorHandle refHandle(outputTensorInfo, &ret.outputExpected[0][0][0][0]); - armnn::NormalizationQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, &refHandle); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateNormalization(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - switch (normMethod) - { - case armnn::NormalizationAlgorithmMethod::LocalBrightness: - { - switch (normChannel) - { - case armnn::NormalizationAlgorithmChannel::Within: - { - // When normalising within channels, the 3x3 kernel covers the entire 2x2 input at every index. - // Therefore, all output values should equal the inputs, but divided by: - // pow((kappa + (accumulatedScale * alpha)), beta) - // ...where accumulatedScale is the sum of every element squared. - float divisor[inputNum]; - for(int i = 0; i < boost::numeric_cast<int>(inputNum); i++) - { - float accumulatedScale = input[i][0][0][0]*input[i][0][0][0] + - input[i][0][0][1]*input[i][0][0][1] + - input[i][0][1][0]*input[i][0][1][0] + - input[i][0][1][1]*input[i][0][1][1]; - divisor[i] = powf((kappa + accumulatedScale * alpha), beta); - } - ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, - std::vector<float>({input[0][0][0][0]/divisor[0], - input[0][0][0][1]/divisor[0], - input[0][0][1][0]/divisor[0], - input[0][0][1][1]/divisor[0], - input[1][0][0][0]/divisor[1], - input[1][0][0][1]/divisor[1], - input[1][0][1][0]/divisor[1], - input[1][0][1][1]/divisor[1]})); - break; - } - case armnn::NormalizationAlgorithmChannel::Across: - { - // When normalising across channels, all output values should equal the inputs, but multiplied by: - // pow((kappa + (accumulatedScale * alpha)), -beta) - // ...where accumulatedScale is the sum of the inputs for adjacent channels for this element squared - // ...where adjacent channels means within half the normSize for the channel - // The test data has only one channel, so this is simplified below. - std::vector<float> outputVector; - for (int n = 0; n < boost::numeric_cast<int>(inputNum); ++n) - { - for (int h = 0; h < boost::numeric_cast<int>(inputHeight); ++h) - { - for (int w = 0; w < boost::numeric_cast<int>(inputWidth); ++w) - { - float accumulatedScale = input[n][0][h][w]*input[n][0][h][w]; - float scale = powf((kappa + accumulatedScale * alpha), -beta); - outputVector.push_back(input[n][0][h][w] * scale); - } - } - } - ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputVector); - break; - } - default: - { - throw armnn::UnimplementedException("Unsupported normalisation channel type, " - "only Across and Within are supported"); - } - } - break; - } - case armnn::NormalizationAlgorithmMethod::LocalContrast: // NOTE: intentional fallthrough. - default: - { - throw armnn::UnimplementedException("Unsupported normalisation method type, " - "only LocalBrightness is supported"); - } - } - - return ret; -} - -LayerTestResult<float,4> CompareNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::NormalizationAlgorithmChannel normChannel, - armnn::NormalizationAlgorithmMethod normMethod) -{ - constexpr unsigned int inputNum = 5; - constexpr unsigned int inputChannels = 3; - constexpr unsigned int inputHeight = 32; - constexpr unsigned int inputWidth = 24; - - constexpr unsigned int outputNum = inputNum; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputHeight = inputHeight; - constexpr unsigned int outputWidth = inputWidth; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; - unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - LayerTestResult<float,4> ret(outputTensorInfo); - - auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 111234); - - constexpr float alpha = 1.f; - constexpr float beta = 1.f; - constexpr float kappa = 1.f; - constexpr uint32_t normSize = 5; - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::NormalizationQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Parameters.m_NormChannelType = normChannel; - data.m_Parameters.m_NormMethodType = normMethod; - data.m_Parameters.m_NormSize = normSize; - data.m_Parameters.m_Alpha = alpha; - data.m_Parameters.m_Beta = beta; - data.m_Parameters.m_K = kappa; - - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); - - armnn::NormalizationQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - // Don't execute if Normalization is not supported for the method and channel types, as an exception will be raised. - armnn::Compute compute = workloadFactory.GetCompute(); - const size_t reasonIfUnsupportedMaxLen = 255; - char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; - ret.supported = armnn::IsNormalizationSupported(compute, inputTensorInfo, outputTensorInfo, data.m_Parameters, - reasonIfUnsupported, reasonIfUnsupportedMaxLen); - if (!ret.supported) - { - return ret; - } - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateNormalization(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateNormalization(refData, refInfo); - - outputHandleRef->Allocate(); - inputHandleRef->Allocate(); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - refWorkloadFactory.Finalize(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); - - return ret; -} - diff --git a/src/armnn/backends/test/PermuteTestImpl.hpp b/src/armnn/backends/test/PermuteTestImpl.hpp deleted file mode 100644 index b49c539b2e..0000000000 --- a/src/armnn/backends/test/PermuteTestImpl.hpp +++ /dev/null @@ -1,225 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" -#include "QuantizeHelper.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -template<typename T> -LayerTestResult<T, 4> SimplePermuteTestImpl( - armnn::IWorkloadFactory& workloadFactory, - armnn::PermuteDescriptor descriptor, - armnn::TensorInfo inputTensorInfo, - armnn::TensorInfo outputTensorInfo, - const std::vector<T>& inputData, - const std::vector<T>& outputExpectedData) -{ - auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); - - LayerTestResult<T, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::PermuteQueueDescriptor data; - data.m_Parameters = descriptor; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -LayerTestResult<float, 4> SimplePermuteFloat32TestCommon(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 2, 2, 2 }; - unsigned int outputShape[] = { 1, 2, 2, 2 }; - - armnn::PermuteDescriptor descriptor; - descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - std::vector<float> input = std::vector<float>( - { - 1.0f, 2.0f, - 3.0f, 4.0f, - - 5.0f, 6.0f, - 7.0f, 8.0f - }); - - std::vector<float> outputExpected = std::vector<float>( - { - 1.0f, 5.0f, 2.0f, 6.0f, - 3.0f, 7.0f, 4.0f, 8.0f - }); - - return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo, - outputTensorInfo, input, outputExpected); -} - -LayerTestResult<uint8_t, 4> SimplePermuteUint8TestCommon(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 2, 2, 2 }; - unsigned int outputShape[] = { 1, 2, 2, 2 }; - - armnn::PermuteDescriptor descriptor; - descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(1.0f); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(1.0f); - - std::vector<uint8_t> input = std::vector<uint8_t>( - { - 1, 2, - 3, 4, - - 5, 6, - 7, 8 - }); - - std::vector<uint8_t> outputExpected = std::vector<uint8_t>( - { - 1, 5, 2, 6, - 3, 7, 4, 8 - }); - - return SimplePermuteTestImpl<uint8_t>(workloadFactory, descriptor, inputTensorInfo, - outputTensorInfo, input, outputExpected); -} - -LayerTestResult<float, 4> -PermuteFloat32ValueSet1TestCommon(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 2, 2, 3 }; - unsigned int outputShape[] = { 1, 3, 2, 2 }; - - armnn::PermuteDescriptor descriptor; - descriptor.m_DimMappings = {0U, 2U, 3U, 1U}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - std::vector<float> input = std::vector<float>( - { - 1.0f, 2.0f, 3.0f, - 11.0f, 12.0f, 13.0f, - 21.0f, 22.0f, 23.0f, - 31.0f, 32.0f, 33.0f, - }); - - std::vector<float> outputExpected = std::vector<float>( - { - 1.0f, 11.0f, 21.0f, 31.0f, - 2.0f, 12.0f, 22.0f, 32.0f, - 3.0f, 13.0f, 23.0f, 33.0f, - }); - - return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo, - outputTensorInfo, input, outputExpected); -} - -LayerTestResult<float, 4> -PermuteFloat32ValueSet2TestCommon(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 3, 2, 2 }; - unsigned int outputShape[] = { 1, 2, 2, 3 }; - - armnn::PermuteDescriptor descriptor; - descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - std::vector<float> input = std::vector<float>( - { - 1.0f, 11.0f, 21.0f, 31.0f, - 2.0f, 12.0f, 22.0f, 32.0f, - 3.0f, 13.0f, 23.0f, 33.0f, - }); - - std::vector<float> outputExpected = std::vector<float>( - { - 1.0f, 2.0f, 3.0f, - 11.0f, 12.0f, 13.0f, - 21.0f, 22.0f, 23.0f, - 31.0f, 32.0f, 33.0f, - }); - - return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo, - outputTensorInfo, input, outputExpected); -} - -LayerTestResult<float, 4> -PermuteFloat32ValueSet3TestCommon(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 2, 3, 3 }; - unsigned int outputShape[] = { 1, 3, 2, 3 }; - - armnn::PermuteDescriptor descriptor; - descriptor.m_DimMappings = {0U, 2U, 3U, 1U}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - std::vector<float> input = std::vector<float>( - { - 1.0f, 2.0f, 3.0f, - 11.0f, 12.0f, 13.0f, - 21.0f, 22.0f, 23.0f, - 31.0f, 32.0f, 33.0f, - 41.0f, 42.0f, 43.0f, - 51.0f, 52.0f, 53.0f, - }); - - std::vector<float> outputExpected = std::vector<float>( - { - 1.0f, 11.0f, 21.0f, 31.0f, 41.0f, 51.0f, - 2.0f, 12.0f, 22.0f, 32.0f, 42.0f, 52.0f, - 3.0f, 13.0f, 23.0f, 33.0f, 43.0f, 53.0f, - }); - - return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo, - outputTensorInfo, input, outputExpected); -} diff --git a/src/armnn/backends/test/Pooling2dTestImpl.hpp b/src/armnn/backends/test/Pooling2dTestImpl.hpp deleted file mode 100644 index e8c7e86e9d..0000000000 --- a/src/armnn/backends/test/Pooling2dTestImpl.hpp +++ /dev/null @@ -1,1116 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" -#include "QuantizeHelper.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -#include <algorithm> - -template<typename T> -LayerTestResult<T, 4> SimplePooling2dTestImpl( - armnn::IWorkloadFactory& workloadFactory, - armnn::Pooling2dDescriptor descriptor, - float qScale, - int32_t qOffset, - const boost::multi_array<T, 4>& input, - const boost::multi_array<T, 4>& outputExpected) -{ - unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); - unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); - unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); - unsigned int inputBatchSize = boost::numeric_cast<unsigned int>(input.shape()[0]); - - unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); - unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); - unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); - unsigned int outputBatchSize = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - LayerTestResult<T, 4> result(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::Pooling2dQueueDescriptor queueDescriptor; - queueDescriptor.m_Parameters = descriptor; - armnn::WorkloadInfo workloadInfo; - AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - - // Don't execute if Pooling is not supported, as an exception will be raised. - armnn::Compute compute = workloadFactory.GetCompute(); - const size_t reasonIfUnsupportedMaxLen = 255; - char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; - result.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, - queueDescriptor.m_Parameters, - reasonIfUnsupported, reasonIfUnsupportedMaxLen); - if (!result.supported) - { - return result; - } - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(queueDescriptor, workloadInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - - result.outputExpected = outputExpected; - - return result; -} - -// -// Tests max pooling with the following parameters: -// -// Pooling size: 3x3 -// Stride: (2,4) -// input size: 8x13 -// channels: 2 -// batch size: 2 -// -template<typename T> -LayerTestResult<T, 4> SimpleMaxPooling2dSize3x3Stride2x4TestCommon(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = 2; - descriptor.m_StrideY = 4; - // forceNoPadding is mainly used for compatibility with ARM Compute. - // As of 16/05/2017, it errors if padX or padY are equal to or greater than the pool size. - descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; - descriptor.m_PadTop = descriptor.m_PadBottom = 0; - descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - unsigned int inputWidth = 8; - unsigned int inputHeight = 13; - unsigned int outputWidth = - (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / - descriptor.m_StrideX; - unsigned int outputHeight = - (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / - descriptor.m_StrideY; - unsigned int channels = 2; - unsigned int batchSize = 2; - - armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - std::vector<float> singleChannelData({ - 0.0f, 4.0f, 8.0f, 1.0f, 6.0f, 4.0f, 5.0f, 8.0f, - 1.0f, 1.0f, 6.0f, 0.0f, 3.0f, 7.0f, 4.0f, 7.0f, - 8.0f, 5.0f, 0.0f, 0.0f, 8.0f, 3.0f, 4.0f, 3.0f, - 8.0f, 2.0f, 5.0f, 4.0f, 1.0f, 9.0f, 2.0f, 0.0f, - 5.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 7.0f, 2.0f, - 1.0f, 2.0f, 6.0f, 2.0f, 7.0f, 9.0f, 5.0f, 2.0f, - 9.0f, 7.0f, 3.0f, 1.0f, 3.0f, 4.0f, 8.0f, 3.0f, - 1.0f, 0.0f, 0.0f, 5.0f, 5.0f, 4.0f, 2.0f, 0.0f, - 6.0f, 4.0f, 3.0f, 6.0f, 9.0f, 5.0f, 5.0f, 6.0f, - 8.0f, 7.0f, 9.0f, 6.0f, 1.0f, 4.0f, 1.0f, 9.0f, - 7.0f, 1.0f, 9.0f, 2.0f, 9.0f, 9.0f, 8.0f, 1.0f, - 4.0f, 4.0f, 5.0f, 9.0f, 2.0f, 6.0f, 6.0f, 4.0f, - 3.0f, 5.0f, 4.0f, 0.0f, 1.0f, 5.0f, 9.0f, 7.0f, - }); - - // Constructs input data. - std::vector<float> inputData; - auto negator = [](float f) { return -f; }; - - // First image (two channels where the second channel is the negative of the first one). - inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); - std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); - - // Second image (same as first image). - inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); - std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); - - auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); - - // These were calculated manually. - auto shape(GetTensorShapeAsArray<4>(outputTensorInfo)); - boost::multi_array<T, 4> outputExpected(shape); - if (forceNoPadding) - { - outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 8.0f, 8.0f, 8.0f, - 9.0f, 7.0f, 9.0f, - 9.0f, 9.0f, 9.0f, - - 0.0f, 0.0f, -3.0f, - -1.0f, 0.0f, 0.0f, - -1.0f, -1.0f, -1.0f, - - 8.0f, 8.0f, 8.0f, - 9.0f, 7.0f, 9.0f, - 9.0f, 9.0f, 9.0f, - - 0.0f, 0.0f, -3.0f, - -1.0f, 0.0f, 0.0f, - -1.0f, -1.0f, -1.0f - })); - } - else - { - outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, - 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, - 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, - - 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, - 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f, - - 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, - 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, - 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, - - 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, - 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f - })); - } - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> SimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; - descriptor.m_StrideX = descriptor.m_StrideY = 2; - descriptor.m_PadLeft = 1; - descriptor.m_PadRight = 1; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.5f, 4.0f, - 1.0f, 2.5f, 4.0f, - 1.0f, 2.5f, 4.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> LargeTensorsAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 100; - descriptor.m_StrideX = descriptor.m_StrideY = 5; - descriptor.m_PadLeft = 50; - descriptor.m_PadRight = 50; - descriptor.m_PadTop = 50; - descriptor.m_PadBottom = 50; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 5, 3, 52, 60 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 5, 3, 11, 13 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - std::vector<T> inputVec; - - for (unsigned int i = 0 ; i < inputTensorInfo.GetShape().GetNumElements(); ++i) - { - inputVec.push_back(1); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, inputVec); - - std::vector<T> outputVec; - - for (unsigned int i = 0 ; i < outputTensorInfo.GetShape().GetNumElements(); ++i) - { - outputVec.push_back(1); - } - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputVec); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> SimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; - descriptor.m_StrideX = descriptor.m_StrideY = 2; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 7.0f, 1.0f, 7.0f, - 1.0f, 7.0f, 1.0f, 7.0f, - 1.0f, 7.0f, 1.0f, 7.0f, - 1.0f, 7.0f, 1.0f, 7.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 5.0f, 5.0f, - 5.0f, 5.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> L2Pooling2dSize3Stride1TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = descriptor.m_StrideY = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 2.0f, 1.0f, 5.0f, 2.0f, - 1.0f, 2.0f, 2.0f, 1.0f, - 5.0f, 4.0f, 1.0f, 5.0f, - 2.0f, 1.0f, 5.0f, 2.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 3.0f, 3.0f, - 3.0f, 3.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> L2Pooling2dSize3Stride3TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = descriptor.m_StrideY = 3; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, - 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, - 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 3.0f, 3.0f, 3.0f, - 3.0f, 3.0f, 3.0f, - 3.0f, 3.0f, 3.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> L2Pooling2dSize3Stride4TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = descriptor.m_StrideY = 4; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 3.0f, 3.0f, - 3.0f, 3.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> L2Pooling2dSize7TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 7; - descriptor.m_StrideX = descriptor.m_StrideY = 7; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 7.0f, 0.0f, - 8.0f, 0.0f, 9.0f, 0.0f, 10.0f, 0.0f, 5.0f, - 0.0f, 5.0f, 0.0f, 2.0f, 0.0f, 1.0f, 1.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType<T>()); - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 3.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> L2Pooling2dSize9TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 9; - descriptor.m_StrideX = descriptor.m_StrideY = 9; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, - 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, - 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, - 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, - 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, - })); - - armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType<T>()); - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 3.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> AsymmetricNonSquarePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::TensorInfo inputTensorInfo({ 1, 1, 1, 3 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); - - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; - descriptor.m_PoolWidth = 2; - descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = 2; - descriptor.m_StrideY = 1; - descriptor.m_PadLeft = 2; - descriptor.m_PadRight = 0; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 2; - descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - // Construct input data. - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 3.0f, 4.0f, - })); - - // These were calculated manually. - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 0.0f, 3.0f, 0.0f, 3.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> ComparePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - armnn::PoolingAlgorithm poolingType, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - const unsigned int inputWidth = 16; - const unsigned int inputHeight = 32; - const unsigned int channelCount = 2; - const unsigned int batchSize = 5; - - const unsigned int poolSize = 3; - const unsigned int strideX = 2; - const unsigned int strideY = 4; - const unsigned int padX = 0; - const unsigned int padY = 0; - - const unsigned int outputWidth = (inputWidth + 2 * padX + strideX - poolSize) / strideX; - const unsigned int outputHeight = (inputHeight + 2 * padY + strideY - poolSize) / strideY; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { batchSize, channelCount, inputHeight, inputWidth }; - unsigned int outputShape[] = { batchSize, channelCount, outputHeight, outputWidth }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 81715); - - LayerTestResult<T, 4> comparisonResult(outputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::Pooling2dQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Parameters.m_PoolType = poolingType; - data.m_Parameters.m_PoolWidth = poolSize; - data.m_Parameters.m_PoolHeight = poolSize; - data.m_Parameters.m_StrideX = strideX; - data.m_Parameters.m_StrideY = strideY; - data.m_Parameters.m_PadLeft = padX; - data.m_Parameters.m_PadRight = padX; - data.m_Parameters.m_PadTop = padY; - data.m_Parameters.m_PadBottom = padY; - data.m_Parameters.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; - - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); - - // Don't execute if Pooling is not supported, as an exception will be raised. - armnn::Compute compute = workloadFactory.GetCompute(); - const size_t reasonIfUnsupportedMaxLen = 255; - char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; - comparisonResult.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, - data.m_Parameters, - reasonIfUnsupported, reasonIfUnsupportedMaxLen); - if (!comparisonResult.supported) - { - return comparisonResult; - } - - armnn::Pooling2dQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreatePooling2d(refData, refInfo); - - outputHandleRef->Allocate(); - inputHandleRef->Allocate(); - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); - - workload->Execute(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); - CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); - - return comparisonResult; -} - -// -// Tests max pooling with the following parameters: -// -// Pooling size: 2x2 -// Stride: (2,2) -// input size: 4x4 -// channels: 1 -// batch size: 1 -// -template<typename T> -LayerTestResult<T, 4> SimpleMaxPooling2dSize2x2Stride2x2TestCommon(armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; - descriptor.m_StrideX = 2; - descriptor.m_StrideY = 2; - descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; - descriptor.m_PadTop = descriptor.m_PadBottom = 0; - descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; - descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - - unsigned int inputWidth = 4; - unsigned int inputHeight = 4; - unsigned int outputWidth = - (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / - descriptor.m_StrideX; - unsigned int outputHeight = - (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / - descriptor.m_StrideY; - unsigned int channels = 1; - unsigned int batchSize = 1; - - std::vector<float> inputData = { - 510.0f, 222.0f, 780.0f, 654.0f, - 141.0f, 276.0f, 15.0f, 546.0f, - 303.0f, 618.0f, 582.0f, 339.0f, - 438.0f, 564.0f, 573.0f, 402.0f - }; - - // Note that left and right edges will be 0.f, due to the 2x2 max pooling only accessing zeros here. - std::vector<float> expectedOutputDataWithPadding = { - 0.0f, 510.0f, 780.0f, 654.0f, 0.0f, - 0.0f, 438.0f, 618.0f, 402.0f, 0.0f - }; - - std::vector<float> expectedOutputDataNoPadding = { - 510.0f, 780.0f, - 618.0f, 582.0f - }; - - armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>()); - - // Scale and offset should match input - we're just calculating maximum values. - armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - forceNoPadding ? QuantizedVector<T>(qScale, qOffset, expectedOutputDataNoPadding) : - QuantizedVector<T>(qScale, qOffset, expectedOutputDataWithPadding)); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -// -// Tests max pooling with the following parameters: -// -// Pooling size: 3x2 -// Stride: (2,2) -// input size: 3x2 -// channels: 1 -// batch size: 1 -// -template<typename T> -LayerTestResult<T, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon( - armnn::IWorkloadFactory& workloadFactory, - bool forceNoPadding, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; - descriptor.m_PoolWidth = 3; - descriptor.m_PoolHeight = 2; - descriptor.m_StrideX = 2; - descriptor.m_StrideY = 2; - descriptor.m_PadLeft = (forceNoPadding) ? 0 : 1; - descriptor.m_PadRight = descriptor.m_PadLeft; - descriptor.m_PadTop = 0; - descriptor.m_PadBottom = 0; - descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - - unsigned int inputWidth = 3; - unsigned int inputHeight = 2; - unsigned int outputWidth = - (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / - descriptor.m_StrideX; - unsigned int outputHeight = - (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / - descriptor.m_StrideY; - unsigned int channels = 1; - unsigned int batchSize = 1; - - std::vector<float> inputData = { - 3.0f, 6.0f, 9.0f, - 12.0f, 15.0f, 18.0f, - }; - - std::vector<float> expectedOutputDataWithPadding = { - 6.0f, 8.0f, - }; - - std::vector<float> expectedOutputDataNoPadding = { - 10.5f, - }; - - armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>()); - - // Scale and offset should match input - we're just calculating average values. - armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - forceNoPadding ? QuantizedVector<T>(qScale, qOffset, expectedOutputDataNoPadding) : - QuantizedVector<T>(qScale, qOffset, expectedOutputDataWithPadding)); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - - -template<typename T> -LayerTestResult<T, 4> IgnorePaddingSimpleMaxPooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; - descriptor.m_StrideX = descriptor.m_StrideY = 2; - descriptor.m_PadLeft = 1; - descriptor.m_PadRight = 1; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - -1.0f, -2.0f, 3.0f, 4.0f, - -1.0f, -2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, -3.0f, -4.0f, - 1.0f, 2.0f, -3.0f, -4.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - -1.0f, 3.0f, 4.0f, - 1.0f, 3.0f, 4.0f, - 1.0f, 2.0f, -4.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> IgnorePaddingMaxPooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = descriptor.m_StrideY = 1; - descriptor.m_PadLeft = 1; - descriptor.m_PadRight = 1; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - -1.0f, -2.0f, 3.0f, 4.0f, - -1.0f, -2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, -3.0f, -4.0f, - 1.0f, 2.0f, -3.0f, -4.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - -1.0f, 3.0f, 4.0f, 4.0f, - 2.0f, 3.0f, 4.0f, 4.0f, - 2.0f, 3.0f, 4.0f, 4.0f, - 2.0f, 2.0f, 2.0f, -3.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> IgnorePaddingSimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; - descriptor.m_StrideX = descriptor.m_StrideY = 2; - descriptor.m_PadLeft = 1; - descriptor.m_PadRight = 1; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 12.0f, 20.0f, 32.0f, 40.0f, - 12.0f, 20.0f, 32.0f, 40.0f, - 12.0f, 20.0f, 32.0f, 40.0f, - 12.0f, 20.0f, 32.0f, 40.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 3.0f, 13.0f, 10.0f, - 6.0f, 26.0f, 20.0f, - 3.0f, 13.0f, 10.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = descriptor.m_StrideY = 2; - descriptor.m_PadLeft = 0; - descriptor.m_PadRight = 0; - descriptor.m_PadTop = 0; - descriptor.m_PadBottom = 0; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Ceiling; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4}, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 2.0f, 3.5f, - 2.0f, 3.5f - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> IgnorePaddingAveragePooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = descriptor.m_StrideY = 1; - descriptor.m_PadLeft = 1; - descriptor.m_PadRight = 1; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 9.0f, 27.0f, 18.0f, 36.0f, - 18.0f, 9.0f, 18.0f, 9.0f, - 27.0f, 18.0f, 9.0f, 27.0f, - 9.0f, 27.0f, 9.0f, 18.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 7.0f, 11.0f, 13.0f, 9.0f, - 12.0f, 17.0f, 19.0f, 13.0f, - 12.0f, 16.0f, 16.0f, 10.0f, - 9.0f, 11.0f, 12.0f, 7.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> IgnorePaddingSimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; - descriptor.m_StrideX = descriptor.m_StrideY = 2; - descriptor.m_PadLeft = 1; - descriptor.m_PadRight = 1; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 2.0f, 4.0f, 8.0f, 16.0f, - 4.0f, 2.0f, 2.0f, 4.0f, - 8.0f, 2.0f, 4.0f, 2.0f, - 16.0f, 2.0f, 2.0f, 8.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 4.4721f, 8.0f, - 4.4721f, 2.6457f, 2.236f, - 8.0f, 1.4142f, 4.0f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} - -template<typename T> -LayerTestResult<T, 4> IgnorePaddingL2Pooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 1.0f, - int32_t qOffset = 0) -{ - armnn::Pooling2dDescriptor descriptor; - descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; - descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; - descriptor.m_StrideX = descriptor.m_StrideY = 1; - descriptor.m_PadLeft = 1; - descriptor.m_PadRight = 1; - descriptor.m_PadTop = 1; - descriptor.m_PadBottom = 1; - descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; - - armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<T, 4>(inputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - 1.0f, 2.0f, 3.0f, 4.0f, - })); - - auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, - QuantizedVector<T>(qScale, qOffset, { - 1.0540f, 1.7638f, 2.5385f, 2.3570f, - 1.2909f, 2.1602f, 3.1091f, 2.8867f, - 1.2909f, 2.1602f, 3.1091f, 2.8867f, - 1.0540f, 1.7638f, 2.5385f, 2.3570f, - })); - - return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); -} diff --git a/src/armnn/backends/test/QuantizeHelper.hpp b/src/armnn/backends/test/QuantizeHelper.hpp deleted file mode 100644 index bb4e561d59..0000000000 --- a/src/armnn/backends/test/QuantizeHelper.hpp +++ /dev/null @@ -1,91 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/TypesUtils.hpp> - -#include <initializer_list> -#include <iterator> -#include <vector> -#include <boost/core/ignore_unused.hpp> - -template<typename T, bool DoQuantize=true> -struct SelectiveQuantizer -{ - static T Quantize(float value, float scale, int32_t offset) - { - return armnn::Quantize<T>(value, scale, offset); - } - - static float Dequantize(T value, float scale, int32_t offset) - { - return armnn::Dequantize(value, scale, offset); - } -}; - -template<typename T> -struct SelectiveQuantizer<T, false> -{ - static T Quantize(float value, float scale, int32_t offset) - { - boost::ignore_unused(scale, offset); - return value; - } - - static float Dequantize(T value, float scale, int32_t offset) - { - boost::ignore_unused(scale, offset); - return value; - } -}; - -template<typename T> -T SelectiveQuantize(float value, float scale, int32_t offset) -{ - return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Quantize(value, scale, offset); -}; - -template<typename T> -float SelectiveDequantize(T value, float scale, int32_t offset) -{ - return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Dequantize(value, scale, offset); -}; - -template<typename ItType> -struct IsFloatingPointIterator -{ - static constexpr bool value=std::is_floating_point<typename std::iterator_traits<ItType>::value_type>::value; -}; - -template <typename T, typename FloatIt, -typename std::enable_if<IsFloatingPointIterator<FloatIt>::value, int>::type=0 // Makes sure fp iterator is valid. -> -std::vector<T> QuantizedVector(float qScale, int32_t qOffset, FloatIt first, FloatIt last) -{ - std::vector<T> quantized; - quantized.reserve(boost::numeric_cast<size_t>(std::distance(first, last))); - - for (auto it = first; it != last; ++it) - { - auto f = *it; - T q =SelectiveQuantize<T>(f, qScale, qOffset); - quantized.push_back(q); - } - - return quantized; -} - -template<typename T> -std::vector<T> QuantizedVector(float qScale, int32_t qOffset, const std::vector<float>& array) -{ - return QuantizedVector<T>(qScale, qOffset, array.begin(), array.end()); -} - -template<typename T> -std::vector<T> QuantizedVector(float qScale, int32_t qOffset, std::initializer_list<float> array) -{ - return QuantizedVector<T>(qScale, qOffset, array.begin(), array.end()); -} diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp deleted file mode 100644 index 62786a9ec4..0000000000 --- a/src/armnn/backends/test/Reference.cpp +++ /dev/null @@ -1,253 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include <boost/test/unit_test.hpp> - -#include "LayerTests.hpp" -#include "test/TensorHelpers.hpp" - -#include "backends/RefWorkloadFactory.hpp" - -#include "test/UnitTests.hpp" - -BOOST_AUTO_TEST_SUITE(Compute_Reference) -using FactoryType = armnn::RefWorkloadFactory; - -// ============================================================================ -// UNIT tests - -// Convolution -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5, SimpleConvolution2d3x5Test, true) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5Uint8, SimpleConvolution2d3x5Uint8Test, true) - -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolutionUint8, SimpleConvolution2d3x5Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) -ARMNN_AUTO_TEST_CASE(SimpleConvolution1dUint8, Convolution1dUint8Test, true) - -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3, SimpleConvolution2d3x3Test, true) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) - -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) - -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPaddingLargerThanHalfKernelSize, - Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) - -// Depthwise Convolution -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2d, DepthwiseConvolution2dTest, true) -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, true) - -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2d, DepthwiseConvolution2dTest, false) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, false) - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) - -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) - -// Pooling -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2Uint8, SimpleMaxPooling2dSize2x2Stride2x2Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, false) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, - IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, true) - -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) -ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) - -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) - -ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2d, AsymmetricNonSquarePooling2dTest) -ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2dUint8, AsymmetricNonSquarePooling2dUint8Test) - -// Activation -ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) -ARMNN_AUTO_TEST_CASE(ConstantLinearActivationUint8, ConstantLinearActivationUint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest) -ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest) - -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) - -ARMNN_AUTO_TEST_CASE(SimpleSigmoid, SimpleSigmoidTest) -ARMNN_AUTO_TEST_CASE(SimpleSigmoidUint8, SimpleSigmoidUint8Test) - -ARMNN_AUTO_TEST_CASE(ReLu1, BoundedReLuUpperAndLowerBoundTest) -ARMNN_AUTO_TEST_CASE(ReLu6, BoundedReLuUpperBoundOnlyTest) -ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) -ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) - -// Fully Conected -ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) - -ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) - -// Splitter -ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) -ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) - -ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) -ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) - -// Merger -ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) -ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) - -// Add -ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) - -ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) -ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) -ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) - -// Sub -ARMNN_AUTO_TEST_CASE(SimpleSub, SubtractionTest) -ARMNN_AUTO_TEST_CASE(SubBroadcast1Element, SubtractionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(SubBroadcast, SubtractionBroadcastTest) - -ARMNN_AUTO_TEST_CASE(SubtractionUint8, SubtractionUint8Test) -ARMNN_AUTO_TEST_CASE(SubBroadcastUint8, SubtractionBroadcastUint8Test) -ARMNN_AUTO_TEST_CASE(SubBroadcast1ElementUint8, SubtractionBroadcast1ElementUint8Test) - -// Div -ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest) -ARMNN_AUTO_TEST_CASE(DivisionByZero, DivisionByZeroTest) -ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest) -// NOTE: division by zero for quantized div needs more attention -// see IVGCVSW-1849 -ARMNN_AUTO_TEST_CASE(DivisionUint8, DivisionUint8Test) -ARMNN_AUTO_TEST_CASE(DivisionUint8Broadcast1Element, DivisionBroadcast1ElementUint8Test) -ARMNN_AUTO_TEST_CASE(DivisionUint8Broadcast1DVector, DivisionBroadcast1DVectorUint8Test) - -// Mul -ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) -ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test) - -// Batch Norm -ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) -ARMNN_AUTO_TEST_CASE(BatchNormUint8, BatchNormUint8Test) - -// Resize Bilinear -ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) -ARMNN_AUTO_TEST_CASE(SimpleResizeBilinearUint8, SimpleResizeBilinearUint8Test) -ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearNopUint8, ResizeBilinearNopUint8Test) -ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMinUint8, ResizeBilinearSqMinUint8Test) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMinUint8, ResizeBilinearMinUint8Test) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMagUint8, ResizeBilinearMagUint8Test) - -// Fake Quantization -ARMNN_AUTO_TEST_CASE(FakeQuantization, FakeQuantizationTest) - -// L2 Noramlization -ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) -ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) -ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) -ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) - -// Constant -ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) -ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8Test) - -// Concat -ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) -ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) - -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) - -// Floor -ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) - -// Reshape -ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) -ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) - -// Permute -ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) -ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) - -// Convert from Float16 to Float32 -ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test) -// Convert from Float32 to Float16 -ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/ReshapeTestImpl.hpp b/src/armnn/backends/test/ReshapeTestImpl.hpp deleted file mode 100644 index 5d32d9d3a6..0000000000 --- a/src/armnn/backends/test/ReshapeTestImpl.hpp +++ /dev/null @@ -1,177 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" -#include "QuantizeHelper.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -template<typename T> -LayerTestResult<T, 4> SimpleReshapeTestImpl( - armnn::IWorkloadFactory& workloadFactory, - armnn::TensorInfo inputTensorInfo, - armnn::TensorInfo outputTensorInfo, - const std::vector<T>& inputData, - const std::vector<T>& outputExpectedData) -{ - auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); - - LayerTestResult<T, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::ReshapeQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReshape(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -LayerTestResult<float, 4> SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 2, 2, 3, 3 }; - unsigned int outputShape[] = { 2, 2, 9, 1 }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - std::vector<float> input = std::vector<float>( - { - 0.0f, 1.0f, 2.0f, - 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, - - 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, - 15.0f, 16.0f, 17.0f, - - 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, - 24.0f, 25.0f, 26.0f, - - 27.0f, 28.0f, 29.0f, - 30.0f, 31.0f, 32.0f, - 33.0f, 34.0f, 35.0f, - }); - - std::vector<float> outputExpected = std::vector<float>( - { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, - - 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, - - 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - }); - - return SimpleReshapeTestImpl<float>(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); -} - -LayerTestResult<float, 4> SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory) -{ - const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); - const armnn::TensorInfo outputTensorInfo(inputTensorInfo); - - auto input = MakeTensor<float, 4>(inputTensorInfo, - { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, - 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); - - LayerTestResult<float, 4> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, - { -38.0f, -16.0f, -9.0f, -2.0f, -2.0f, -2.0f, -1.0f, -1.0f, 0.0f, - 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 2.0f, 8.0f, 15.0f, 37.0f }); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::FloorQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFloor(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - - return ret; -} - -LayerTestResult<uint8_t, 4> SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 2, 2, 3, 3 }; - unsigned int outputShape[] = { 2, 2, 9, 1 }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationScale(1.0f); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(1.0f); - - std::vector<uint8_t> input = std::vector<uint8_t>( - { - 0, 1, 2, - 3, 4, 5, - 6, 7, 8, - - 9, 10, 11, - 12, 13, 14, - 15, 16, 17, - - 18, 19, 20, - 21, 22, 23, - 24, 25, 26, - - 27, 28, 29, - 30, 31, 32, - 33, 34, 35, - }); - - std::vector<uint8_t> outputExpected = std::vector<uint8_t>( - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, - - 9, 10, 11, 12, 13, 14, 15, 16, 17, - - 18, 19, 20, 21, 22, 23, 24, 25, 26, - - 27, 28, 29, 30, 31, 32, 33, 34, 35, - }); - - return SimpleReshapeTestImpl<uint8_t>(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); -} diff --git a/src/armnn/backends/test/SoftmaxTestImpl.hpp b/src/armnn/backends/test/SoftmaxTestImpl.hpp deleted file mode 100644 index 5bc13fa21c..0000000000 --- a/src/armnn/backends/test/SoftmaxTestImpl.hpp +++ /dev/null @@ -1,153 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" -#include "QuantizeHelper.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -#include <algorithm> - -template<typename T> -LayerTestResult<T, 2> SimpleSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, float beta) -{ - using std::exp; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 2, 4 }; - - inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); - float qScale = 1.f / 256.f; - int qOffset = 0; - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - - outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - - LayerTestResult<T, 2> ret(outputTensorInfo); - - // Each row is independently softmax'd. - auto input = MakeTensor<T, 2>(inputTensorInfo, std::vector<T>( - QuantizedVector<T>(qScale, 0, { - 0.f, 1.f, 0.f, 0.f, - .5f, 0.f, 0.f, 0.f, - }))); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::SoftmaxQueueDescriptor data; - data.m_Parameters.m_Beta = beta; - - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); - - float x0[4] = { exp((0.f - 1.0f) * beta), exp((1.0f - 1.0f) * beta), - exp((0.0f - 1.0f) * beta), exp((0.0f - 1.0f) * beta) }; - float sum0 = x0[0] + x0[1] + x0[2] + x0[3]; - float x1[4] = { exp((0.5f - 0.5f) * beta), exp((0.0f - 0.5f) * beta), - exp((0.0f - 0.5f) * beta), exp((0.0f - 0.5f) * beta) }; - float sum1 = x1[0] + x1[1] + x1[2] + x1[3]; - - ret.outputExpected = MakeTensor<T, 2>(outputTensorInfo, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - x0[0] / sum0, x0[1] / sum0, x0[2] / sum0, x0[3] / sum0, - x1[0] / sum1, x1[1] / sum1, x1[2] / sum1, x1[3] / sum1 - }))); - - return ret; -} - -template<typename T> -LayerTestResult<T, 2> CompareSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, - armnn::IWorkloadFactory& refWorkloadFactory, - float beta) -{ - - const int batchSize = 20; - const int channels = 30; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { batchSize, channels }; - - inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); - outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); - float qScale = 1.f / 256.f; - int qOffset = 0; - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - - - LayerTestResult<T, 2> ret(outputTensorInfo); - auto input = MakeRandomTensor<T, 2>(inputTensorInfo, 0xF00D, 0.0f, 1.0f); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - armnn::SoftmaxQueueDescriptor data; - data.m_Parameters.m_Beta = beta; - - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - - std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); - - - armnn::SoftmaxQueueDescriptor refData = data; - armnn::WorkloadInfo refInfo = info; - SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); - SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info); - std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateSoftmax(refData, refInfo); - - outputHandleRef->Allocate(); - inputHandleRef->Allocate(); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); - CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]); - - workloadFactory.Finalize(); - workload->Execute(); - refWorkloadFactory.Finalize(); - workloadRef->Execute(); - - CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); - CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get()); - - return ret; -} diff --git a/src/armnn/backends/test/SplitterTestImpl.hpp b/src/armnn/backends/test/SplitterTestImpl.hpp deleted file mode 100644 index 5dcc412d0e..0000000000 --- a/src/armnn/backends/test/SplitterTestImpl.hpp +++ /dev/null @@ -1,307 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <backends/WorkloadInfo.hpp> - -#include "test/TensorHelpers.hpp" - -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" - -#include "backends/test/QuantizeHelper.hpp" - - -template<typename T> -std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& workloadFactory, - float qScale = 0.0f, - int32_t qOffset = 0) -{ - unsigned int inputWidth = 5; - unsigned int inputHeight = 6; - unsigned int inputChannels = 3; - - // NOTE: Compute Library imposes a restriction that the x and y dimension (input height and width) - // cannot be split. - // For the reasons for this, see first comment on https://jira.arm.com/browse/IVGCVSW-1239 - // - // This test has therefore been recast to split the channels, then split the resulting subtensor. - - // To take channel 0 of original output - // and channel 0 and channel 1 of the split subtensor. - unsigned int outputWidth1 = inputWidth; - unsigned int outputHeight1 = inputHeight; - unsigned int outputChannels1 = 1; - - // To take channel 1 and 2 of the original output. - unsigned int outputWidth2 = inputWidth; - unsigned int outputHeight2 = inputHeight; - unsigned int outputChannels2 = 2; - - - // Define the tensor descriptors. - armnn::TensorInfo inputTensorInfo({ inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>()); - - // Outputs of the original split. - armnn::TensorInfo outputTensorInfo1({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo2({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>()); - - // Outputs of the subsequent subtensor split. - armnn::TensorInfo outputTensorInfo3({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo4({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); - - // Set quantization parameters if the requested type is a quantized type. - // The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize. - if(armnn::IsQuantizedType<T>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo1.SetQuantizationScale(qScale); - outputTensorInfo1.SetQuantizationOffset(qOffset); - outputTensorInfo2.SetQuantizationScale(qScale); - outputTensorInfo2.SetQuantizationOffset(qOffset); - outputTensorInfo3.SetQuantizationScale(qScale); - outputTensorInfo3.SetQuantizationOffset(qOffset); - outputTensorInfo4.SetQuantizationScale(qScale); - outputTensorInfo4.SetQuantizationOffset(qOffset); - } - - LayerTestResult<T,3> ret1(outputTensorInfo1); - LayerTestResult<T,3> ret2(outputTensorInfo2); - LayerTestResult<T,3> ret3(outputTensorInfo3); - LayerTestResult<T,3> ret4(outputTensorInfo4); - - auto input = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - - 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, - 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, - 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, - - 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, - 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, - 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, - 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, - 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, - 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, - }) - )); - - // Channel 0 of the original input. - ret1.outputExpected = MakeTensor<T, 3>(outputTensorInfo1, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - }) - )); - - // Channel 1 & 2 of the original input. - ret2.outputExpected = MakeTensor<T, 3>(outputTensorInfo2, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, - 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, - 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, - - 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, - 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, - 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, - 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, - 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, - 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, - }) - )); - - // Channel 0 of return 2 (i.e. channels 1 and 2 of the original input). - ret3.outputExpected = MakeTensor<T, 3>(outputTensorInfo3, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, - 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, - 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, - }) - )); - - // Channel 1 of return 2. - ret4.outputExpected = MakeTensor<T, 3>(outputTensorInfo4, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, - 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, - 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, - 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, - 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, - 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, - }) - )); - - // NOTE: as a corollary of the splitting of x and y restriction the x and y values of the view origins - // have to be zero, the co-ordinates are as per the tensor info above channels, height/y, width/x - // note that under the hood the compute engine reverses these i.e. its coordinate system is x, y, channels. - std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of output[0]. - armnn::SplitterQueueDescriptor::ViewOrigin window1(wOrigin1); - - std::vector<unsigned int> wOrigin2 = {1, 0, 0}; //Extent of the window is defined by size of output[1]. - armnn::SplitterQueueDescriptor::ViewOrigin window2(wOrigin2); - - std::vector<unsigned int> wOrigin3 = {0, 0, 0}; //Extent of the window is defined by size of output[2]. - armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); - - std::vector<unsigned int> wOrigin4 = {1, 0, 0}; //Extent of the window is defined by size of output[3]. - armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); - - bool subTensorsSupported = workloadFactory.SupportsSubTensors(); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - - std::unique_ptr<armnn::ITensorHandle> outputHandle1 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo1.GetShape(), wOrigin1.data()) : - workloadFactory.CreateTensorHandle(outputTensorInfo1); - - std::unique_ptr<armnn::ITensorHandle> outputHandle2 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo2.GetShape(), wOrigin2.data()) : - workloadFactory.CreateTensorHandle(outputTensorInfo2); - - std::unique_ptr<armnn::ITensorHandle> outputHandle3 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo3.GetShape(), wOrigin3.data()) : - workloadFactory.CreateTensorHandle(outputTensorInfo3); - - std::unique_ptr<armnn::ITensorHandle> outputHandle4 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo4.GetShape(), wOrigin4.data()) : - workloadFactory.CreateTensorHandle(outputTensorInfo4); - - // Do the first split - armnn::SplitterQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo1, outputHandle1.get()); - AddOutputToWorkload(data, info, outputTensorInfo2, outputHandle2.get()); - - data.m_ViewOrigins.push_back(window1); - data.m_ViewOrigins.push_back(window2); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info); - - inputHandle->Allocate(); - outputHandle1->Allocate(); - outputHandle2->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); - - workload->Execute(); - - CopyDataFromITensorHandle(&ret1.output[0][0][0], outputHandle1.get()); - CopyDataFromITensorHandle(&ret2.output[0][0][0], outputHandle2.get()); - -// // Do the second split. - armnn::SplitterQueueDescriptor data2; - armnn::WorkloadInfo info2; - AddInputToWorkload(data2, info2, outputTensorInfo2, outputHandle2.get()); - AddOutputToWorkload(data2, info2, outputTensorInfo3, outputHandle3.get()); - AddOutputToWorkload(data2, info2, outputTensorInfo4, outputHandle4.get()); - - data2.m_ViewOrigins.push_back(window3); - data2.m_ViewOrigins.push_back(window4); - - std::unique_ptr<armnn::IWorkload> workload2 = workloadFactory.CreateSplitter(data2, info2); - - outputHandle3->Allocate(); - outputHandle4->Allocate(); - - workload2->Execute(); - - CopyDataFromITensorHandle(&ret3.output[0][0][0], outputHandle3.get()); - CopyDataFromITensorHandle(&ret4.output[0][0][0], outputHandle4.get()); - - std::vector<LayerTestResult<T,3>> ret = {ret1, ret2, ret3, ret4,}; - - return ret; -} - - -template <typename T> -LayerTestResult<T, 3> CopyViaSplitterTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) -{ - const armnn::TensorInfo tensorInfo({ 3, 6, 5 }, armnn::GetDataType<T>()); - auto input = MakeTensor<T, 3>(tensorInfo, QuantizedVector<T>(qScale, qOffset, - { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - - 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, - 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, - 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, - - 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, - 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, - 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, - 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, - 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, - 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, - })); - - std::vector<unsigned int> origin = { 0, 0, 0 }; - armnn::SplitterQueueDescriptor::ViewOrigin window(origin); - - const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); - - std::unique_ptr<armnn::ITensorHandle> outputHandle = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*inputHandle, tensorInfo.GetShape(), origin.data()) : - workloadFactory.CreateTensorHandle(tensorInfo); - - armnn::SplitterQueueDescriptor data; - armnn::WorkloadInfo info; - AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); - - data.m_ViewOrigins.push_back(window); - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); - - workload->Execute(); - - LayerTestResult<T, 3> ret(tensorInfo); - CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); - ret.outputExpected = input; - - return ret; -} diff --git a/src/armnn/backends/test/TensorCopyUtils.cpp b/src/armnn/backends/test/TensorCopyUtils.cpp deleted file mode 100644 index dc5864b285..0000000000 --- a/src/armnn/backends/test/TensorCopyUtils.cpp +++ /dev/null @@ -1,159 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <algorithm> -#include <cstring> -#include <boost/cast.hpp> -#include <Half.hpp> - -#include "TensorCopyUtils.hpp" - -#ifdef ARMCOMPUTECL_ENABLED -#include "backends/ClTensorHandle.hpp" -#endif - -#if ARMCOMPUTENEON_ENABLED -#include "backends/NeonTensorHandle.hpp" -#endif - -#if ARMCOMPUTECLENABLED || ARMCOMPUTENEON_ENABLED -#include "backends/ArmComputeTensorUtils.hpp" -#endif - -#include "backends/CpuTensorHandle.hpp" - -void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) -{ - switch (tensorHandle->GetType()) - { - case armnn::ITensorHandle::Cpu: - { - auto handle = boost::polymorphic_downcast<armnn::ScopedCpuTensorHandle*>(tensorHandle); - memcpy(handle->GetTensor<void>(), mem, handle->GetTensorInfo().GetNumBytes()); - break; - } -#ifdef ARMCOMPUTECL_ENABLED - case armnn::ITensorHandle::CL: - { - using armnn::armcomputetensorutils::CopyArmComputeITensorData; - auto handle = boost::polymorphic_downcast<armnn::IClTensorHandle*>(tensorHandle); - handle->Map(true); - switch(handle->GetDataType()) - { - case arm_compute::DataType::F32: - CopyArmComputeITensorData(static_cast<const float*>(mem), handle->GetTensor()); - break; - case arm_compute::DataType::QASYMM8: - CopyArmComputeITensorData(static_cast<const uint8_t*>(mem), handle->GetTensor()); - break; - case arm_compute::DataType::F16: - CopyArmComputeITensorData(static_cast<const armnn::Half*>(mem), handle->GetTensor()); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - handle->Unmap(); - break; - } -#endif -#if ARMCOMPUTENEON_ENABLED - case armnn::ITensorHandle::Neon: - { - using armnn::armcomputetensorutils::CopyArmComputeITensorData; - auto handle = boost::polymorphic_downcast<armnn::INeonTensorHandle*>(tensorHandle); - switch (handle->GetDataType()) - { - case arm_compute::DataType::F32: - CopyArmComputeITensorData(static_cast<const float*>(mem), handle->GetTensor()); - break; - case arm_compute::DataType::QASYMM8: - CopyArmComputeITensorData(static_cast<const uint8_t*>(mem), handle->GetTensor()); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - break; - } -#endif - default: - { - throw armnn::UnimplementedException(); - } - } -} - -void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle) -{ - switch (tensorHandle->GetType()) - { - case armnn::ITensorHandle::Cpu: - { - auto handle = boost::polymorphic_downcast<const armnn::ScopedCpuTensorHandle*>(tensorHandle); - memcpy(mem, handle->GetTensor<void>(), handle->GetTensorInfo().GetNumBytes()); - break; - } -#ifdef ARMCOMPUTECL_ENABLED - case armnn::ITensorHandle::CL: - { - using armnn::armcomputetensorutils::CopyArmComputeITensorData; - auto handle = boost::polymorphic_downcast<const armnn::IClTensorHandle*>(tensorHandle); - const_cast<armnn::IClTensorHandle*>(handle)->Map(true); - switch(handle->GetDataType()) - { - case arm_compute::DataType::F32: - CopyArmComputeITensorData(handle->GetTensor(), static_cast<float*>(mem)); - break; - case arm_compute::DataType::QASYMM8: - CopyArmComputeITensorData(handle->GetTensor(), static_cast<uint8_t*>(mem)); - break; - case arm_compute::DataType::F16: - CopyArmComputeITensorData(handle->GetTensor(), static_cast<armnn::Half*>(mem)); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - const_cast<armnn::IClTensorHandle*>(handle)->Unmap(); - break; - } -#endif -#if ARMCOMPUTENEON_ENABLED - case armnn::ITensorHandle::Neon: - { - using armnn::armcomputetensorutils::CopyArmComputeITensorData; - auto handle = boost::polymorphic_downcast<const armnn::INeonTensorHandle*>(tensorHandle); - switch (handle->GetDataType()) - { - case arm_compute::DataType::F32: - CopyArmComputeITensorData(handle->GetTensor(), static_cast<float*>(mem)); - break; - case arm_compute::DataType::QASYMM8: - CopyArmComputeITensorData(handle->GetTensor(), static_cast<uint8_t*>(mem)); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - break; - } -#endif - default: - { - throw armnn::UnimplementedException(); - } - } -} - -void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) -{ - tensorHandle->Allocate(); - CopyDataToITensorHandle(tensorHandle, mem); -} diff --git a/src/armnn/backends/test/TensorCopyUtils.hpp b/src/armnn/backends/test/TensorCopyUtils.hpp deleted file mode 100644 index 0cec839903..0000000000 --- a/src/armnn/backends/test/TensorCopyUtils.hpp +++ /dev/null @@ -1,14 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "armnn/Tensor.hpp" -#include "backends/ITensorHandle.hpp" - -void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem); - -void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle); - -void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem);
\ No newline at end of file diff --git a/src/armnn/backends/test/WorkloadDataValidation.cpp b/src/armnn/backends/test/WorkloadDataValidation.cpp deleted file mode 100644 index a5cfbd1270..0000000000 --- a/src/armnn/backends/test/WorkloadDataValidation.cpp +++ /dev/null @@ -1,471 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include <boost/test/unit_test.hpp> -#include <backends/CpuTensorHandle.hpp> -#include <backends/Workload.hpp> -#include <backends/RefWorkloads.hpp> -#include <backends/RefWorkloadFactory.hpp> - -#include <armnn/Exceptions.hpp> - -#include "WorkloadTestUtils.hpp" - -using namespace armnn; - -BOOST_AUTO_TEST_SUITE(WorkloadInfoValidation) - - - -BOOST_AUTO_TEST_CASE(QueueDescriptor_Validate_WrongNumOfInputsOutputs) -{ - InputQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - //Invalid argument exception is expected, because no inputs and no outputs were defined. - BOOST_CHECK_THROW(RefWorkloadFactory().CreateInput(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - -BOOST_AUTO_TEST_CASE(RefPooling2dFloat32Workload_Validate_WrongDimTensor) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = {2, 3, 4}; // <- Invalid - input tensor has to be 4D. - unsigned int outputShape[] = {2, 3, 4, 5}; - - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - inputTensorInfo = armnn::TensorInfo(3, inputShape, armnn::DataType::Float32); - - Pooling2dQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - - // Invalid argument exception is expected, input tensor has to be 4D. - BOOST_CHECK_THROW(RefPooling2dFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - -BOOST_AUTO_TEST_CASE(SoftmaxQueueDescriptor_Validate_WrongInputHeight) -{ - unsigned int inputHeight = 1; - unsigned int inputWidth = 1; - unsigned int inputChannels = 4; - unsigned int inputNum = 2; - - unsigned int outputChannels = inputChannels; - unsigned int outputHeight = inputHeight + 1; //Makes data invalid - Softmax expects height and width to be 1. - unsigned int outputWidth = inputWidth; - unsigned int outputNum = inputNum; - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; - unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - SoftmaxQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - //Invalid argument exception is expected, because height != 1. - BOOST_CHECK_THROW(RefSoftmaxFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - -BOOST_AUTO_TEST_CASE(FullyConnectedQueueDescriptor_Validate_RequiredDataMissing) -{ - unsigned int inputWidth = 1; - unsigned int inputHeight = 1; - unsigned int inputChannels = 5; - unsigned int inputNum = 2; - - unsigned int outputWidth = 1; - unsigned int outputHeight = 1; - unsigned int outputChannels = 3; - unsigned int outputNum = 2; - - // Define the tensor descriptors. - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - armnn::TensorInfo weightsDesc; - armnn::TensorInfo biasesDesc; - - unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; - unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; - unsigned int weightsShape[] = { 1, 1, inputChannels, outputChannels }; - unsigned int biasShape[] = { 1, outputChannels, outputHeight, outputWidth }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - weightsDesc = armnn::TensorInfo(4, weightsShape, armnn::DataType::Float32); - biasesDesc = armnn::TensorInfo(4, biasShape, armnn::DataType::Float32); - - FullyConnectedQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - ScopedCpuTensorHandle weightTensor(weightsDesc); - ScopedCpuTensorHandle biasTensor(biasesDesc); - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - invalidData.m_Weight = &weightTensor; - invalidData.m_Bias = &biasTensor; - invalidData.m_Parameters.m_BiasEnabled = true; - invalidData.m_Parameters.m_TransposeWeightMatrix = false; - - - //Invalid argument exception is expected, because not all required fields have been provided. - //In particular inputsData[0], outputsData[0] and weightsData can not be null. - BOOST_CHECK_THROW(RefFullyConnectedFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - - -BOOST_AUTO_TEST_CASE(NormalizationQueueDescriptor_Validate_WrongInputHeight) -{ - constexpr unsigned int inputNum = 5; - constexpr unsigned int inputHeight = 32; - constexpr unsigned int inputWidth = 24; - constexpr unsigned int inputChannels = 3; - - constexpr unsigned int outputNum = inputNum; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputHeight = inputHeight + 1; //Makes data invalid - normalization requires. - //Input and output to have the same dimensions. - constexpr unsigned int outputWidth = inputWidth; - - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; - unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - - armnn::NormalizationAlgorithmMethod normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; - armnn::NormalizationAlgorithmChannel normChannel = armnn::NormalizationAlgorithmChannel::Across; - float alpha = 1.f; - float beta = 1.f; - float kappa = 1.f; - uint32_t normSize = 5; - - NormalizationQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - invalidData.m_Parameters.m_NormChannelType = normChannel; - invalidData.m_Parameters.m_NormMethodType = normMethod; - invalidData.m_Parameters.m_NormSize = normSize; - invalidData.m_Parameters.m_Alpha = alpha; - invalidData.m_Parameters.m_Beta = beta; - invalidData.m_Parameters.m_K = kappa; - - //Invalid argument exception is expected, because input height != output height. - BOOST_CHECK_THROW(RefNormalizationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - -BOOST_AUTO_TEST_CASE(SplitterQueueDescriptor_Validate_WrongWindow) -{ - constexpr unsigned int inputNum = 1; - constexpr unsigned int inputHeight = 32; - constexpr unsigned int inputWidth = 24; - constexpr unsigned int inputChannels = 3; - - constexpr unsigned int outputNum = inputNum; - constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputHeight = 18; - constexpr unsigned int outputWidth = inputWidth; - - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; - unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - SplitterQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - // Invalid, since it has only 3 dimensions while the input tensor is 4d. - std::vector<unsigned int> wOrigin = {0, 0, 0}; - armnn::SplitterQueueDescriptor::ViewOrigin window(wOrigin); - invalidData.m_ViewOrigins.push_back(window); - - BOOST_TEST_INFO("Invalid argument exception is expected, because split window dimensionality does not " - "match input."); - BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - - // Invalid, since window extends past the boundary of input tensor. - std::vector<unsigned int> wOrigin3 = {0, 0, 15, 0}; - armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); - invalidData.m_ViewOrigins[0] = window3; - BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ outputHeight > inputHeight"); - BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - - - std::vector<unsigned int> wOrigin4 = {0, 0, 0, 0}; - armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); - invalidData.m_ViewOrigins[0] = window4; - - std::vector<unsigned int> wOrigin5 = {1, 16, 20, 2}; - armnn::SplitterQueueDescriptor::ViewOrigin window5(wOrigin4); - invalidData.m_ViewOrigins.push_back(window5); - - BOOST_TEST_INFO("Invalid exception due to number of split windows not matching number of outputs."); - BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - - -BOOST_AUTO_TEST_CASE(MergerQueueDescriptor_Validate_WrongWindow) -{ - constexpr unsigned int inputNum = 1; - constexpr unsigned int inputChannels = 3; - constexpr unsigned int inputHeight = 32; - constexpr unsigned int inputWidth = 24; - - constexpr unsigned int outputNum = 1; - constexpr unsigned int outputChannels = 3; - constexpr unsigned int outputHeight = 32; - constexpr unsigned int outputWidth = 24; - - - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; - unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - MergerQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - // Invalid, since it has only 3 dimensions while the input tensor is 4d. - std::vector<unsigned int> wOrigin = {0, 0, 0}; - armnn::MergerQueueDescriptor::ViewOrigin window(wOrigin); - invalidData.m_ViewOrigins.push_back(window); - - BOOST_TEST_INFO("Invalid argument exception is expected, because merge window dimensionality does not " - "match input."); - BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - - // Invalid, since window extends past the boundary of output tensor. - std::vector<unsigned int> wOrigin3 = {0, 0, 15, 0}; - armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); - invalidData.m_ViewOrigins[0] = window3; - BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ inputHeight > outputHeight"); - BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - - - std::vector<unsigned int> wOrigin4 = {0, 0, 0, 0}; - armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); - invalidData.m_ViewOrigins[0] = window4; - - std::vector<unsigned int> wOrigin5 = {1, 16, 20, 2}; - armnn::MergerQueueDescriptor::ViewOrigin window5(wOrigin4); - invalidData.m_ViewOrigins.push_back(window5); - - BOOST_TEST_INFO("Invalid exception due to number of merge windows not matching number of inputs."); - BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - -BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputNumbers) -{ - armnn::TensorInfo input1TensorInfo; - armnn::TensorInfo input2TensorInfo; - armnn::TensorInfo input3TensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int shape[] = {1, 1, 1, 1}; - - input1TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - input2TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - input3TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); - - AdditionQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - // Too few inputs. - BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - - AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); - - // Correct. - BOOST_CHECK_NO_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo)); - - AddInputToWorkload(invalidData, invalidInfo, input3TensorInfo, nullptr); - - // Too many inputs. - BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - -BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputShapes) -{ - armnn::TensorInfo input1TensorInfo; - armnn::TensorInfo input2TensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int shape1[] = {1, 1, 2, 1}; - unsigned int shape2[] = {1, 1, 3, 2}; - - // Incompatible shapes even with broadcasting. - { - input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); - input2TensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); - - AdditionQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); - AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - } - - // Output size not compatible with input sizes. - { - input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); - input2TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); - - AdditionQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); - AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - // Output differs. - BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - } -} - -BOOST_AUTO_TEST_CASE(MultiplicationQueueDescriptor_Validate_InputTensorDimensionMismatch) -{ - armnn::TensorInfo input0TensorInfo; - armnn::TensorInfo input1TensorInfo; - armnn::TensorInfo outputTensorInfo; - - constexpr unsigned int input0Shape[] = { 2, 2, 4, 4 }; - constexpr std::size_t dimensionCount = std::extent<decltype(input0Shape)>::value; - - // Checks dimension consistency for input tensors. - for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) - { - unsigned int input1Shape[dimensionCount]; - for (unsigned int i = 0; i < dimensionCount; ++i) - { - input1Shape[i] = input0Shape[i]; - } - - ++input1Shape[dimIndex]; - - input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); - input1TensorInfo = armnn::TensorInfo(dimensionCount, input1Shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); - - MultiplicationQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); - AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); - - BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - } - - // Checks dimension consistency for input and output tensors. - for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) - { - unsigned int outputShape[dimensionCount]; - for (unsigned int i = 0; i < dimensionCount; ++i) - { - outputShape[i] = input0Shape[i]; - } - - ++outputShape[dimIndex]; - - input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); - input1TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(dimensionCount, outputShape, armnn::DataType::Float32); - - MultiplicationQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); - AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); - - BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - } -} - -BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - // The input and output shapes should have the same number of elements, but these don't. - unsigned int inputShape[] = { 1, 1, 2, 3 }; - unsigned int outputShape[] = { 1, 1, 1, 2 }; - - inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); - - ReshapeQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - // InvalidArgumentException is expected, because the number of elements don't match. - BOOST_CHECK_THROW(RefReshapeFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); -} - - -BOOST_AUTO_TEST_CASE(LstmQueueDescriptor_Validate) -{ - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 2 }; - unsigned int outputShape[] = { 1 }; - - inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(1, outputShape, armnn::DataType::Float32); - - LstmQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - BOOST_CHECK_THROW(invalidData.Validate(invalidInfo), armnn::InvalidArgumentException); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/WorkloadTestUtils.hpp b/src/armnn/backends/test/WorkloadTestUtils.hpp deleted file mode 100644 index a7b75309f7..0000000000 --- a/src/armnn/backends/test/WorkloadTestUtils.hpp +++ /dev/null @@ -1,55 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/Tensor.hpp> -#include <backends/WorkloadInfo.hpp> - -namespace armnn -{ -class ITensorHandle; -} - -template <typename QueueDescriptor> -void AddInputToWorkload(QueueDescriptor& descriptor, - armnn::WorkloadInfo& info, - const armnn::TensorInfo& tensorInfo, - armnn::ITensorHandle* tensorHandle) -{ - descriptor.m_Inputs.push_back(tensorHandle); - info.m_InputTensorInfos.push_back(tensorInfo); -} - -template <typename QueueDescriptor> -void AddOutputToWorkload(QueueDescriptor& descriptor, - armnn::WorkloadInfo& info, - const armnn::TensorInfo& tensorInfo, - armnn::ITensorHandle* tensorHandle) -{ - descriptor.m_Outputs.push_back(tensorHandle); - info.m_OutputTensorInfos.push_back(tensorInfo); -} - -template <typename QueueDescriptor> -void SetWorkloadInput(QueueDescriptor& descriptor, - armnn::WorkloadInfo& info, - unsigned int index, - const armnn::TensorInfo& tensorInfo, - armnn::ITensorHandle* tensorHandle) -{ - descriptor.m_Inputs[index] = tensorHandle; - info.m_InputTensorInfos[index] = tensorInfo; -} - -template <typename QueueDescriptor> -void SetWorkloadOutput(QueueDescriptor& descriptor, - armnn::WorkloadInfo& info, - unsigned int index, - const armnn::TensorInfo& tensorInfo, - armnn::ITensorHandle* tensorHandle) -{ - descriptor.m_Outputs[index] = tensorHandle; - info.m_OutputTensorInfos[index] = tensorInfo; -}
\ No newline at end of file |