diff options
Diffstat (limited to 'src/backends/cl')
92 files changed, 5416 insertions, 0 deletions
diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt new file mode 100644 index 0000000000..80ca0acc08 --- /dev/null +++ b/src/backends/cl/CMakeLists.txt @@ -0,0 +1,22 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnClBackend_sources + ClContextControl.cpp + ClContextControl.hpp + ClLayerSupport.cpp + ClLayerSupport.hpp + ClWorkloadFactory.cpp + ClWorkloadFactory.hpp +) + +if(ARMCOMPUTECL) + add_subdirectory(workloads test) +endif() + +add_library(armnnClBackend STATIC ${armnnClBackend_sources}) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp new file mode 100644 index 0000000000..e8b21c942d --- /dev/null +++ b/src/backends/cl/ClContextControl.cpp @@ -0,0 +1,235 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClContextControl.hpp" + +#include "armnn/Exceptions.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <arm_compute/runtime/CL/CLScheduler.h> +#endif + +#include <boost/assert.hpp> +#include <boost/format.hpp> +#include <boost/log/trivial.hpp> +#include <boost/polymorphic_cast.hpp> +#include <boost/core/ignore_unused.hpp> + +#include "LeakChecking.hpp" + +namespace cl +{ +class Context; +class CommandQueue; +class Device; +} + +namespace armnn +{ + +ClContextControl::ClContextControl(IGpuAccTunedParameters* clTunedParameters, + bool profilingEnabled) + : m_clTunedParameters(boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters)) + , m_ProfilingEnabled(profilingEnabled) +{ + // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled. + boost::ignore_unused(m_ProfilingEnabled); + +#ifdef ARMCOMPUTECL_ENABLED + try + { + std::vector<cl::Platform> platforms; + cl::Platform::get(&platforms); + + // Selects default platform for the first element. + cl::Platform::setDefault(platforms[0]); + + std::vector<cl::Device> devices; + platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); + + // Selects default device for the first element. + cl::Device::setDefault(devices[0]); + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Removes the use of global CL context. + cl::Context::setDefault(cl::Context{}); + BOOST_ASSERT(cl::Context::getDefault()() == NULL); + + // Removes the use of global CL command queue. + cl::CommandQueue::setDefault(cl::CommandQueue{}); + BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL); + + // Always load the OpenCL runtime. + LoadOpenClRuntime(); +#endif +} + +ClContextControl::~ClContextControl() +{ +#ifdef ARMCOMPUTECL_ENABLED + // Load the OpencCL runtime without the tuned parameters to free the memory for them. + try + { + UnloadOpenClRuntime(); + } + catch (const cl::Error& clError) + { + // This should not happen, it is ignored if it does. + + // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an + // exception of type std::length_error. + // Using stderr instead in this context as there is no point in nesting try-catch blocks here. + std::cerr << "A CL error occurred unloading the runtime tuner parameters: " + << clError.what() << ". CL error code is: " << clError.err() << std::endl; + } +#endif +} + +void ClContextControl::LoadOpenClRuntime() +{ + DoLoadOpenClRuntime(true); +} + +void ClContextControl::UnloadOpenClRuntime() +{ + DoLoadOpenClRuntime(false); +} + +void ClContextControl::DoLoadOpenClRuntime(bool useTunedParameters) +{ +#ifdef ARMCOMPUTECL_ENABLED + cl::Device device = cl::Device::getDefault(); + cl::Context context; + cl::CommandQueue commandQueue; + + if (arm_compute::CLScheduler::get().context()() != NULL) + { + // Wait for all queued CL requests to finish before reinitialising it. + arm_compute::CLScheduler::get().sync(); + } + + try + { + arm_compute::CLKernelLibrary::get().clear_programs_cache(); + // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no + // context references); it is initialised again, with a proper context, later. + arm_compute::CLScheduler::get().init(context, commandQueue, device); + arm_compute::CLKernelLibrary::get().init(".", context, device); + + { + // + // Here we replace the context with a new one in which + // the memory leak checks show it as an extra allocation but + // because of the scope of the leak checks, it doesn't count + // the disposal of the original object. On the other hand it + // does count the creation of this context which it flags + // as a memory leak. By adding the following line we prevent + // this to happen. + // + ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE(); + context = cl::Context(device); + } + + // NOTE: In this specific case profiling has to be enabled on the command queue + // in order for the CLTuner to work. + bool profilingNeededForClTuner = useTunedParameters && m_clTunedParameters && + m_clTunedParameters->m_Mode == IGpuAccTunedParameters::Mode::UpdateTunedParameters; + + if (m_ProfilingEnabled || profilingNeededForClTuner) + { + // Create a new queue with profiling enabled. + commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); + } + else + { + // Use default queue. + commandQueue = cl::CommandQueue(context, device); + } + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. + arm_compute::CLKernelLibrary::get().init(".", context, device); + + arm_compute::ICLTuner* tuner = nullptr; + if (useTunedParameters && m_clTunedParameters) + { + tuner = &m_clTunedParameters->m_Tuner; + } + arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner); +#endif +} + +void ClContextControl::ClearClCache() +{ + DoLoadOpenClRuntime(true); +} + +armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode) +{ + return new ClTunedParameters(mode); +} + +armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode) +{ + return IGpuAccTunedParametersPtr(CreateRaw(mode), &IGpuAccTunedParameters::Destroy); +} + +void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) +{ + delete params; +} + +ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode) + : m_Mode(mode) +#ifdef ARMCOMPUTECL_ENABLED + , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) +#endif +{ +} + +void ClTunedParameters::Load(const char* filename) +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.load_from_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + + e.what()); + } +#endif +} + +void ClTunedParameters::Save(const char* filename) const +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.save_to_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + + e.what()); + } +#endif +} + +} // namespace armnn diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp new file mode 100644 index 0000000000..5ac56423bd --- /dev/null +++ b/src/backends/cl/ClContextControl.hpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "armnn/IRuntime.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include <arm_compute/runtime/CL/CLTuner.h> +#endif + +namespace armnn +{ + +class IGpuAccTunedParameters; +class ClTunedParameters; + +// ARM Compute OpenCL context control. +class ClContextControl +{ +public: + + ClContextControl(IGpuAccTunedParameters* clTunedParameters = nullptr, + bool profilingEnabled = false); + + virtual ~ClContextControl(); + + void LoadOpenClRuntime(); + + // Users should call this (after freeing all of the cl::Context objects they use) + // to release the cached memory used by the compute library. + void UnloadOpenClRuntime(); + + // Clear the CL cache, without losing the tuned parameter settings. + void ClearClCache(); + +private: + + void DoLoadOpenClRuntime(bool useTunedParameters); + + ClTunedParameters* m_clTunedParameters; + + bool m_ProfilingEnabled; +}; + +class ClTunedParameters : public IGpuAccTunedParameters +{ +public: + ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode); + + virtual void Load(const char* filename); + virtual void Save(const char* filename) const; + + Mode m_Mode; + +#ifdef ARMCOMPUTECL_ENABLED + arm_compute::CLTuner m_Tuner; +#endif +}; + +} // namespace armnn diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp new file mode 100644 index 0000000000..6c1940b02f --- /dev/null +++ b/src/backends/cl/ClLayerSupport.cpp @@ -0,0 +1,478 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayerSupportCommon.hpp" + +#include "ClLayerSupport.hpp" +#include "InternalTypes.hpp" +#include <armnn/Descriptors.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +#include <boost/core/ignore_unused.hpp> + +#ifdef ARMCOMPUTECL_ENABLED +#include "workloads/ClAdditionWorkload.hpp" +#include "workloads/ClActivationFloatWorkload.hpp" +#include "workloads/ClBatchNormalizationFloatWorkload.hpp" +#include "workloads/ClConvertFp16ToFp32Workload.hpp" +#include "workloads/ClConvertFp32ToFp16Workload.hpp" +#include "workloads/ClConvolution2dBaseWorkload.hpp" +#include "workloads/ClDepthwiseConvolutionBaseWorkload.hpp" +#include "workloads/ClDivisionFloatWorkload.hpp" +#include "workloads/ClL2NormalizationFloatWorkload.hpp" +#include "workloads/ClMultiplicationFloatWorkload.hpp" +#include "workloads/ClFullyConnectedWorkload.hpp" +#include "workloads/ClPadWorkload.hpp" +#include "workloads/ClPooling2dBaseWorkload.hpp" +#include "workloads/ClPermuteWorkload.hpp" +#include "workloads/ClNormalizationFloatWorkload.hpp" +#include "workloads/ClSoftmaxBaseWorkload.hpp" +#include "workloads/ClSubtractionWorkload.hpp" +#include "workloads/ClLstmFloatWorkload.hpp" +#endif + +using namespace boost; + +namespace armnn +{ +namespace +{ +template<unsigned int FilterSize> +bool IsMatchingSize2d(const TensorInfo& weightInfo) +{ + // Width & Height must match. + return (weightInfo.GetShape()[3] == FilterSize) && (weightInfo.GetShape()[2] == FilterSize); +} + +template<uint32_t ValidStride> +bool IsMatchingStride(uint32_t actualStride) +{ + return ValidStride == actualStride; +} + +template<uint32_t FirstStride, uint32_t SecondStride, uint32_t... ValidStrides> +bool IsMatchingStride(uint32_t actualStride) +{ + return IsMatchingStride<FirstStride>(actualStride) || IsMatchingStride<SecondStride, ValidStrides...>(actualStride); +}; + +bool IsClBackendSupported(std::string* reasonIfUnsupported) +{ +#if ARMCOMPUTECL_ENABLED + return true; +#else + if (reasonIfUnsupported != nullptr) + { + *reasonIfUnsupported = "The armnn library has been built without CL support"; + } + return false; +#endif +} + +#if ARMCOMPUTECL_ENABLED +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) (expr) +#else +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) IsClBackendSupported(reasonIfUnsupported) +#endif + +#if ARMCOMPUTECL_ENABLED +template<class FuncType, class... Args> +inline bool IsWorkloadSupported(FuncType&& func, std::string* reasonIfUnsupported, Args&&... args) +{ + arm_compute::Status aclStatus = func(std::forward<Args>(args)...); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + return supported; +} + +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); +#else +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsClBackendSupported(reasonIfUnsupported); +#endif + +} //namespace + +template<typename FloatFunc, typename Uint8Func, typename ... Params> +bool IsSupportedForDataTypeCl(std::string* reasonIfUnsupported, + DataType dataType, + FloatFunc floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsClBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + floatFuncPtr, + uint8FuncPtr, + std::forward<Params>(params)...); +} + +bool IsActivationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClActivationWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClAdditionValidate(input0, + input1, + output, + reasonIfUnsupported)); +} + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchNormalizationValidate, + reasonIfUnsupported, + input, + output, + mean, + var, + beta, + gamma, + descriptor); +} + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) +{ + bool isSupported = false; + + bool strideXIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideX); + bool strideXIsThree = IsMatchingStride<3>(desc.m_StrideX); + + bool strideYIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideY); + bool strideYIsThree = IsMatchingStride<3>(desc.m_StrideY); + + bool strideIsOneOrTwo = strideXIsOneOrTwo && strideYIsOneOrTwo; + bool strideIsOneOrTwoOrThree = ( strideXIsOneOrTwo || strideXIsThree ) && ( strideYIsOneOrTwo || strideYIsThree ); + + // 1x1 convolution with strides of 1,2,3. + isSupported |= IsMatchingSize2d<1>(weightInfo) && ( strideIsOneOrTwoOrThree ); + + // 3x3 convolution with strides of 1,2. + isSupported |= IsMatchingSize2d<3>(weightInfo) && ( strideIsOneOrTwo ); + + // 5x5 convolution with strides of 1,2 + isSupported |= IsMatchingSize2d<5>(weightInfo) && ( strideIsOneOrTwo ); + + //Fall back to normal convolution for the asymmetric padding case. + if (desc.m_PadLeft != desc.m_PadRight || + desc.m_PadTop != desc.m_PadBottom) + { + //Direct convolution does not support asymmetric padding yet. + isSupported = false; + } + + return isSupported; +} + +bool IsDirectConvolution2dParamsSupportedCl(std::string* reasonIfUnsupported, + const Convolution2dDescriptor& parameters, + const TensorInfo& weightInfo) +{ + return IsClDirectConvolution2dSupported(weightInfo, parameters); +} + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvolution2dWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthwiseConvolutionWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsSubtractionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClSubtractionValidate(input0, + input1, + output, + reasonIfUnsupported)); +} + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClFullyConnectedWorkloadValidate, + reasonIfUnsupported, + input, + output, + weights, + biases, + descriptor); +} + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsMergerSupportedCl(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPadSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClPadValidate(input, output, descriptor, reasonIfUnsupported)); +} + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPermuteWorkloadValidate, reasonIfUnsupported, descriptor); +} + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + FORWARD_WORKLOAD_VALIDATE_FUNC(ClSoftmaxWorkloadValidate, reasonIfUnsupported, input, output); +} + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(descriptor); + return false; +} + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + return true; +} + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsClBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate, reasonIfUnsupported, + input, outputStateIn, cellStateIn, scratchBuffer, outputStateOut, cellStateOut, + output, descriptor, inputToForgetWeights, inputToCellWeights, + inputToOutputWeights, recurrentToForgetWeights, + recurrentToCellWeights, recurrentToOutputWeights, + forgetGateBias, cellBias, outputGateBias, + inputToInputWeights, recurrentToInputWeights, + cellToInputWeights, inputGateBias, projectionWeights, + projectionBias, cellToForgetWeights, cellToOutputWeights); +} + +bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp16ToFp32WorkloadValidate, + reasonIfUnsupported, + input, + output, + reasonIfUnsupported); +} + +bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp32ToFp16WorkloadValidate, + reasonIfUnsupported, + input, + output, + reasonIfUnsupported); +} + +bool IsMeanSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return false; +} + +} diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp new file mode 100644 index 0000000000..700d71801d --- /dev/null +++ b/src/backends/cl/ClLayerSupport.hpp @@ -0,0 +1,164 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <armnn/DescriptorsFwd.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/ArmNN.hpp> + +#include <boost/optional.hpp> + +namespace armnn +{ +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); +bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights); + +bool IsActivationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsSubtractionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedCl(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPadSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMeanSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp new file mode 100644 index 0000000000..556e4479b6 --- /dev/null +++ b/src/backends/cl/ClTensorHandle.hpp @@ -0,0 +1,141 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <backends/OutputHandler.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/runtime/CL/CLTensor.h> +#include <arm_compute/runtime/CL/CLSubTensor.h> +#include <arm_compute/runtime/CL/CLMemoryGroup.h> +#include <arm_compute/runtime/IMemoryGroup.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/Coordinates.h> + +#include <boost/polymorphic_pointer_cast.hpp> + +namespace armnn +{ + + +class IClTensorHandle : public ITensorHandle +{ +public: + virtual arm_compute::ICLTensor& GetTensor() = 0; + virtual arm_compute::ICLTensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0; +}; + +class ClTensorHandle : public IClTensorHandle +{ +public: + ClTensorHandle(const TensorInfo& tensorInfo) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + ClTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); + } + + arm_compute::CLTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override {armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);} + + virtual void Manage() override + { + assert(m_MemoryGroup != nullptr); + m_MemoryGroup->manage(&m_Tensor); + } + + virtual const void* Map(bool blocking = true) const override + { + const_cast<arm_compute::CLTensor*>(&m_Tensor)->map(blocking); + return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override { const_cast<arm_compute::CLTensor*>(&m_Tensor)->unmap(); } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override + { + m_MemoryGroup = boost::polymorphic_pointer_downcast<arm_compute::CLMemoryGroup>(memoryGroup); + } + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } +private: + arm_compute::CLTensor m_Tensor; + std::shared_ptr<arm_compute::CLMemoryGroup> m_MemoryGroup; +}; + +class ClSubTensorHandle : public IClTensorHandle +{ +public: + ClSubTensorHandle(IClTensorHandle* parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent->GetTensor(), shape, coords) + { + parentHandle = parent; + } + + arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual const void* Map(bool blocking = true) const override + { + const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->map(blocking); + return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override { const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->unmap(); } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } + + virtual ITensorHandle* GetParent() const override { return parentHandle; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + mutable arm_compute::CLSubTensor m_Tensor; + ITensorHandle* parentHandle = nullptr; + +}; + +} diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp new file mode 100644 index 0000000000..46a96559bf --- /dev/null +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -0,0 +1,506 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "ClWorkloadFactory.hpp" + +#include <armnn/Exceptions.hpp> +#include <armnn/Utils.hpp> + +#include <string> +#include <backends/CpuTensorHandle.hpp> +#include <Layer.hpp> + +#ifdef ARMCOMPUTECL_ENABLED +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <arm_compute/runtime/CL/CLBufferAllocator.h> +#include <arm_compute/runtime/CL/CLScheduler.h> + +#include <backends/cl/workloads/ClWorkloads.hpp> + +#include <backends/MemCopyWorkload.hpp> +#include <backends/cl/ClTensorHandle.hpp> + +#include <memory/IPoolManager.hpp> +#endif + +#include <backends/MakeWorkloadHelper.hpp> + +#include <boost/polymorphic_cast.hpp> +#include <boost/format.hpp> +#include <boost/log/trivial.hpp> + +namespace armnn +{ + +bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, + boost::optional<DataType> dataType, + std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::GpuAcc, layer, dataType, outReasonIfUnsupported); +} + +#ifdef ARMCOMPUTECL_ENABLED + +ClWorkloadFactory::ClWorkloadFactory() +: m_MemoryManager(std::make_unique<arm_compute::CLBufferAllocator>()) +{ +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const +{ + std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + BOOST_ASSERT(parent.GetType() == ITensorHandle::CL); + + arm_compute::Coordinates coords; + arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); + + coords.set_num_dimensions(subTensorShape.GetNumDimensions()); + for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) + { + // Arm compute indexes tensor coords in reverse order. + unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; + coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); + } + + return std::make_unique<ClSubTensorHandle>( + boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClActivationFloatWorkload, ClActivationUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClSoftmaxFloatWorkload, ClSoftmaxUint8Workload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClSplitterFloatWorkload, ClSplitterUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClMergerFloatWorkload, ClMergerUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClFullyConnectedWorkload, ClFullyConnectedWorkload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClPermuteFloatWorkload, ClPermuteUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClPooling2dFloatWorkload, ClPooling2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClConvolution2dFloatWorkload, ClConvolution2dUint8Workload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClDepthwiseConvolutionFloatWorkload, ClDepthwiseConvolutionUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClAdditionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, + ClAdditionWorkload<armnn::DataType::QuantisedAsymm8>>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClMultiplicationFloatWorkload, ClMultiplicationFloatWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClSubtractionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, + ClSubtractionWorkload<armnn::DataType::QuantisedAsymm8>>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); + } + + return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateResizeBilinear( + const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClResizeBilinearFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClConstantFloatWorkload, ClConstantUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClReshapeFloatWorkload, ClReshapeUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique<ClConvertFp16ToFp32Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique<ClConvertFp32ToFp16Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClPadWorkload<armnn::DataType::Float16, armnn::DataType::Float32>, + ClPadWorkload<armnn::DataType::QuantisedAsymm8>>(descriptor, info); +} + +void ClWorkloadFactory::Finalize() +{ + m_MemoryManager.Finalize(); +} + +void ClWorkloadFactory::Release() +{ + m_MemoryManager.Release(); +} + +void ClWorkloadFactory::Acquire() +{ + m_MemoryManager.Acquire(); +} + +#else // #if ARMCOMPUTECL_ENABLED + +ClWorkloadFactory::ClWorkloadFactory() +{ +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return nullptr; +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const +{ + return nullptr; +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +void ClWorkloadFactory::Finalize() +{ +} + +void ClWorkloadFactory::Release() +{ +} + +void ClWorkloadFactory::Acquire() +{ +} + +#endif // #if ARMCOMPUTECL_ENABLED + +} // namespace armnn diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp new file mode 100644 index 0000000000..59ae3b343a --- /dev/null +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -0,0 +1,139 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <backends/OutputHandler.hpp> + +#include <armnn/IRuntime.hpp> +#include <boost/optional.hpp> + +#include "memory/BaseMemoryManager.hpp" + +namespace armnn +{ + +// ARM Compute OpenCL workload factory. +class ClWorkloadFactory : public IWorkloadFactory +{ +public: + ClWorkloadFactory(); + + virtual Compute GetCompute() const override { return Compute::GpuAcc; } + + static bool IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return true; } + + virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const override; + + virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const override; + + virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual void Finalize() override; + + virtual void Release() override; + + virtual void Acquire() override; + +private: + +#ifdef ARMCOMPUTECL_ENABLED + mutable ClMemoryManager m_MemoryManager; +#endif +}; + +} // namespace armnn diff --git a/src/backends/cl/backend.cmake b/src/backends/cl/backend.cmake new file mode 100644 index 0000000000..1af88e3c9b --- /dev/null +++ b/src/backends/cl/backend.cmake @@ -0,0 +1,13 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTECL) + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/cl) + list(APPEND armnnLibraries armnnClBackend armnnClBackendWorkloads) +else() + message("CL backend is disabled") + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/cl) + list(APPEND armnnLibraries armnnClBackend) +endif() diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk new file mode 100644 index 0000000000..2418a24249 --- /dev/null +++ b/src/backends/cl/backend.mk @@ -0,0 +1,51 @@ +# +# Copyright © 2017 ARM Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +# BACKEND_SOURCES contains the list of files to be included +# in the Android build and it is picked up by the Android.mk +# file in the root of ArmNN + +BACKEND_SOURCES := \ + ClContextControl.cpp \ + ClLayerSupport.cpp \ + ClWorkloadFactory.cpp \ + workloads/ClActivationFloatWorkload.cpp \ + workloads/ClActivationUint8Workload.cpp \ + workloads/ClAdditionWorkload.cpp \ + workloads/ClBaseConstantWorkload.cpp \ + workloads/ClBatchNormalizationFloatWorkload.cpp \ + workloads/ClConstantFloatWorkload.cpp \ + workloads/ClConstantUint8Workload.cpp \ + workloads/ClConvertFp16ToFp32Workload.cpp \ + workloads/ClConvertFp32ToFp16Workload.cpp \ + workloads/ClConvolution2dBaseWorkload.cpp \ + workloads/ClConvolution2dFloatWorkload.cpp \ + workloads/ClConvolution2dUint8Workload.cpp \ + workloads/ClDepthwiseConvolutionBaseWorkload.cpp \ + workloads/ClDepthwiseConvolutionFloatWorkload.cpp \ + workloads/ClDepthwiseConvolutionUint8Workload.cpp \ + workloads/ClDivisionFloatWorkload.cpp \ + workloads/ClFloorFloatWorkload.cpp \ + workloads/ClFullyConnectedWorkload.cpp \ + workloads/ClL2NormalizationFloatWorkload.cpp \ + workloads/ClLstmFloatWorkload.cpp \ + workloads/ClMergerFloatWorkload.cpp \ + workloads/ClMergerUint8Workload.cpp \ + workloads/ClMultiplicationFloatWorkload.cpp \ + workloads/ClNormalizationFloatWorkload.cpp \ + workloads/ClPadWorkload.cpp \ + workloads/ClPermuteWorkload.cpp \ + workloads/ClPooling2dBaseWorkload.cpp \ + workloads/ClPooling2dFloatWorkload.cpp \ + workloads/ClPooling2dUint8Workload.cpp \ + workloads/ClReshapeFloatWorkload.cpp \ + workloads/ClReshapeUint8Workload.cpp \ + workloads/ClResizeBilinearFloatWorkload.cpp \ + workloads/ClSoftmaxBaseWorkload.cpp \ + workloads/ClSoftmaxFloatWorkload.cpp \ + workloads/ClSoftmaxUint8Workload.cpp \ + workloads/ClSplitterFloatWorkload.cpp \ + workloads/ClSplitterUint8Workload.cpp \ + workloads/ClSubtractionWorkload.cpp diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/src/backends/cl/test/CMakeLists.txt diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt new file mode 100644 index 0000000000..066c37f083 --- /dev/null +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -0,0 +1,92 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnClBackendWorkloads_sources + ClActivationFloatWorkload.cpp + ClActivationFloatWorkload.hpp + ClActivationUint8Workload.cpp + ClActivationUint8Workload.hpp + ClAdditionWorkload.cpp + ClAdditionWorkload.hpp + ClBaseConstantWorkload.cpp + ClBaseConstantWorkload.hpp + ClBaseMergerWorkload.hpp + ClBaseSplitterWorkload.hpp + ClBatchNormalizationFloatWorkload.cpp + ClBatchNormalizationFloatWorkload.hpp + ClConstantFloatWorkload.cpp + ClConstantFloatWorkload.hpp + ClConstantUint8Workload.cpp + ClConstantUint8Workload.hpp + ClConvertFp16ToFp32Workload.cpp + ClConvertFp16ToFp32Workload.hpp + ClConvertFp32ToFp16Workload.cpp + ClConvertFp32ToFp16Workload.hpp + ClConvolution2dBaseWorkload.cpp + ClConvolution2dBaseWorkload.hpp + ClConvolution2dFloatWorkload.cpp + ClConvolution2dFloatWorkload.hpp + ClConvolution2dUint8Workload.cpp + ClConvolution2dUint8Workload.hpp + ClDepthwiseConvolutionBaseWorkload.cpp + ClDepthwiseConvolutionBaseWorkload.hpp + ClDepthwiseConvolutionFloatWorkload.cpp + ClDepthwiseConvolutionFloatWorkload.hpp + ClDepthwiseConvolutionUint8Workload.cpp + ClDepthwiseConvolutionUint8Workload.hpp + ClDivisionFloatWorkload.cpp + ClDivisionFloatWorkload.hpp + ClFloorFloatWorkload.cpp + ClFloorFloatWorkload.hpp + ClFullyConnectedWorkload.cpp + ClFullyConnectedWorkload.hpp + ClL2NormalizationFloatWorkload.cpp + ClL2NormalizationFloatWorkload.hpp + ClLstmFloatWorkload.cpp + ClLstmFloatWorkload.hpp + ClMergerFloatWorkload.cpp + ClMergerFloatWorkload.hpp + ClMergerUint8Workload.cpp + ClMergerUint8Workload.hpp + ClMultiplicationFloatWorkload.cpp + ClMultiplicationFloatWorkload.hpp + ClNormalizationFloatWorkload.cpp + ClNormalizationFloatWorkload.hpp + ClPadWorkload.cpp + ClPadWorkload.hpp + ClPermuteWorkload.cpp + ClPermuteWorkload.hpp + ClPooling2dBaseWorkload.cpp + ClPooling2dBaseWorkload.hpp + ClPooling2dFloatWorkload.cpp + ClPooling2dFloatWorkload.hpp + ClPooling2dUint8Workload.cpp + ClPooling2dUint8Workload.hpp + ClReshapeFloatWorkload.cpp + ClReshapeFloatWorkload.hpp + ClReshapeUint8Workload.cpp + ClReshapeUint8Workload.hpp + ClResizeBilinearFloatWorkload.cpp + ClResizeBilinearFloatWorkload.hpp + ClSoftmaxBaseWorkload.cpp + ClSoftmaxBaseWorkload.hpp + ClSoftmaxFloatWorkload.cpp + ClSoftmaxFloatWorkload.hpp + ClSoftmaxUint8Workload.cpp + ClSoftmaxUint8Workload.hpp + ClSplitterFloatWorkload.cpp + ClSplitterFloatWorkload.hpp + ClSplitterUint8Workload.cpp + ClSplitterUint8Workload.hpp + ClSubtractionWorkload.cpp + ClSubtractionWorkload.hpp + ClWorkloads.hpp + ClWorkloadUtils.hpp +) + +add_library(armnnClBackendWorkloads STATIC ${armnnClBackendWorkloads_sources}) +target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/cl/workloads/ClActivationFloatWorkload.cpp b/src/backends/cl/workloads/ClActivationFloatWorkload.cpp new file mode 100644 index 0000000000..cbaac9d226 --- /dev/null +++ b/src/backends/cl/workloads/ClActivationFloatWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "CL: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::CLActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +ClActivationFloatWorkload::ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<ActivationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void ClActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClActivationFloatWorkload.hpp b/src/backends/cl/workloads/ClActivationFloatWorkload.hpp new file mode 100644 index 0000000000..cb560a791b --- /dev/null +++ b/src/backends/cl/workloads/ClActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +// Activation layer execution. +class ClActivationFloatWorkload : public FloatWorkload<ActivationQueueDescriptor> +{ +public: + ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClActivationUint8Workload.cpp b/src/backends/cl/workloads/ClActivationUint8Workload.cpp new file mode 100644 index 0000000000..ad6b73074b --- /dev/null +++ b/src/backends/cl/workloads/ClActivationUint8Workload.cpp @@ -0,0 +1,44 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationUint8Workload.hpp" +#include <backends/cl/ClLayerSupport.hpp> + +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ActivationQueueDescriptor>(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void ClActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} + +} //namespace Armnn + + diff --git a/src/backends/cl/workloads/ClActivationUint8Workload.hpp b/src/backends/cl/workloads/ClActivationUint8Workload.hpp new file mode 100644 index 0000000000..d0b7d3a78f --- /dev/null +++ b/src/backends/cl/workloads/ClActivationUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +// Activation layer execution. +class ClActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> +{ +public: + ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp new file mode 100644 index 0000000000..aa032e872c --- /dev/null +++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClAdditionWorkload.hpp" + +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template <armnn::DataType... T> +ClAdditionWorkload<T...>::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<AdditionQueueDescriptor, T...>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template <armnn::DataType... T> +void ClAdditionWorkload<T...>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); + m_Layer.run(); +} + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClAdditionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; +template class armnn::ClAdditionWorkload<armnn::DataType::QuantisedAsymm8>; diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp new file mode 100644 index 0000000000..3e4ee26793 --- /dev/null +++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +template <armnn::DataType... dataTypes> +class ClAdditionWorkload : public TypedWorkload<AdditionQueueDescriptor, dataTypes...> +{ +public: + ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticAddition m_Layer; +}; + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseConstantWorkload.cpp b/src/backends/cl/workloads/ClBaseConstantWorkload.cpp new file mode 100644 index 0000000000..2557020b59 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseConstantWorkload.cpp @@ -0,0 +1,64 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBaseConstantWorkload.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <Half.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +template class ClBaseConstantWorkload<DataType::Float16, DataType::Float32>; +template class ClBaseConstantWorkload<DataType::QuantisedAsymm8>; + +template<armnn::DataType... dataTypes> +void ClBaseConstantWorkload<dataTypes...>::Execute() const +{ + // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data + // on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not + // have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<Half>()); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<float>()); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<uint8_t>()); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseConstantWorkload.hpp b/src/backends/cl/workloads/ClBaseConstantWorkload.hpp new file mode 100644 index 0000000000..f7a23a9162 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseConstantWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ +template <armnn::DataType... DataTypes> +class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataTypes...> +{ +public: + ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload<ConstantQueueDescriptor, DataTypes...>(descriptor, info) + , m_RanOnce(false) + { + } + + void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseMergerWorkload.hpp b/src/backends/cl/workloads/ClBaseMergerWorkload.hpp new file mode 100644 index 0000000000..f8ff6f9379 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseMergerWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +// Base class template providing an implementation of the Merger layer common to all data types. +template <armnn::DataType... DataTypes> +class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...> +{ +public: + using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp b/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..7fdcc84235 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template <armnn::DataType... DataTypes> +class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...> +{ +public: + using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..5bff7a63c9 --- /dev/null +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBatchNormalizationFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/cl/ClLayerSupport.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor &desc) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + desc.m_Eps); +} + +ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info) +{ + m_Mean = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean); + InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance); + InitializeArmComputeClTensorData(*m_Beta, m_Data.m_Beta); + InitializeArmComputeClTensorData(*m_Gamma, m_Data.m_Gamma); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void ClBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..804591c444 --- /dev/null +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& desc); + +class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor> +{ +public: + ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLBatchNormalizationLayer m_Layer; + + std::unique_ptr<arm_compute::CLTensor> m_Mean; + std::unique_ptr<arm_compute::CLTensor> m_Variance; + std::unique_ptr<arm_compute::CLTensor> m_Gamma; + std::unique_ptr<arm_compute::CLTensor> m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClConstantFloatWorkload.cpp b/src/backends/cl/workloads/ClConstantFloatWorkload.cpp new file mode 100644 index 0000000000..1565047c22 --- /dev/null +++ b/src/backends/cl/workloads/ClConstantFloatWorkload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantFloatWorkload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloatWorkload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantFloatWorkload.hpp b/src/backends/cl/workloads/ClConstantFloatWorkload.hpp new file mode 100644 index 0000000000..0cbeaad9ea --- /dev/null +++ b/src/backends/cl/workloads/ClConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ +class ClConstantFloatWorkload : public ClBaseConstantWorkload<DataType::Float16, DataType::Float32> +{ +public: + using ClBaseConstantWorkload<DataType::Float16, DataType::Float32>::ClBaseConstantWorkload; + void Execute() const override; +}; + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantUint8Workload.cpp b/src/backends/cl/workloads/ClConstantUint8Workload.cpp new file mode 100644 index 0000000000..a5ef0321cd --- /dev/null +++ b/src/backends/cl/workloads/ClConstantUint8Workload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantUint8Workload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantUint8Workload.hpp b/src/backends/cl/workloads/ClConstantUint8Workload.hpp new file mode 100644 index 0000000000..30556dc0d6 --- /dev/null +++ b/src/backends/cl/workloads/ClConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ + +class ClConstantUint8Workload : public ClBaseConstantWorkload<DataType::QuantisedAsymm8> +{ +public: + using ClBaseConstantWorkload<DataType::QuantisedAsymm8>::ClBaseConstantWorkload; + void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..e7663b4ca4 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp16ToFp32Workload.hpp" +#include <backends/cl/ClTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( + const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Input should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Output should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..b6447488f7 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor> +{ +public: + + ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..2ae4adc424 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp32ToFp16Workload.hpp" +#include <backends/cl/ClTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( + const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Input should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Output should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..95d19905d7 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor> +{ +public: + + ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..58699a8287 --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dBaseWorkload.hpp" +#include <backends/cl/ClLayerSupport.hpp> +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h> + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +} diff --git a/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..a983dba79a --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnn/Tensor.hpp> +#include <armnn/Descriptors.hpp> + +#include <boost/optional.hpp> + +#include <arm_compute/core/Error.h> + +namespace armnn +{ + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases); + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..813808345e --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/cl/ClLayerSupport.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dFloatWorkload::ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<Convolution2dQueueDescriptor>(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + + // todo: check tensor shapes match. + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..1f9710e1ea --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +class ClConvolution2dFloatWorkload : public FloatWorkload<Convolution2dQueueDescriptor> +{ +public: + ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; + std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp b/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..d9b9dfd833 --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dUint8Workload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/cl/ClLayerSupport.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + // todo: check tensor shapes match + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp b/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..1720ec935c --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +class ClConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor> +{ +public: + ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; + std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..5a036db922 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,125 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +#include "TypeUtils.hpp" + +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +template<armnn::DataType... dataTypes> +ClDepthwiseConvolutionBaseWorkload<dataTypes...>::ClDepthwiseConvolutionBaseWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>(descriptor, info) +{ + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + std::string name = std::string("ClDepthwiseConvolution") + + GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload"; + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + + //Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3); + if (use3x3Optimisation) + { + m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + else + { + m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>(); + static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + + BOOST_ASSERT(m_DepthwiseConvolutionLayer); +} + +template<armnn::DataType... dataTypes> +void ClDepthwiseConvolutionBaseWorkload<dataTypes...>::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generate known implementations for linker +template class ClDepthwiseConvolutionBaseWorkload<DataType::Float16, DataType::Float32>; +template class ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..9d5cde30b6 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <boost/optional.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases); + +template<armnn::DataType... dataTypes> +class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...> +{ +public: + using TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>::m_Data; + + ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + +protected: + std::unique_ptr<arm_compute::IFunction> m_DepthwiseConvolutionLayer; + + std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; + std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..17ecd29307 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionFloatWorkload.hpp" + +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionFloatWorkload::ClDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..4f9d5f332e --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionFloatWorkload : public ClDepthwiseConvolutionBaseWorkload<DataType::Float16, + DataType::Float32> +{ +public: + ClDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..22922e4df6 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionUint8Workload.hpp" + +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..b9f676de94 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,23 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8> +{ +public: + ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp new file mode 100644 index 0000000000..a2d8534682 --- /dev/null +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDivisionFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); +} + + +ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<DivisionQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_ArithmeticDivision.configure(&input0, &input1, &output); +} + +void ClDivisionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); + + // Executes the layer. + m_ArithmeticDivision.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp new file mode 100644 index 0000000000..1aa7ec69f6 --- /dev/null +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor> +{ +public: + ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const + WorkloadInfo& info); + + using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp new file mode 100644 index 0000000000..0a60fc3b5c --- /dev/null +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFloorFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload<FloorQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp new file mode 100644 index 0000000000..513862a4d7 --- /dev/null +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +class ClFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor> +{ +public: + ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp new file mode 100644 index 0000000000..b3a97f35f8 --- /dev/null +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFullyConnectedWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/cl/ClLayerSupport.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + return arm_compute::CLFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_WeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); + + if (m_BiasesTensor) + { + InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void ClFullyConnectedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void ClFullyConnectedWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp new file mode 100644 index 0000000000..0c9047235b --- /dev/null +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class ClFullyConnectedWorkload : public armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor> +{ +public: + ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + using armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data; + void Execute() const override; + +private: + mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..edc13bcfea --- /dev/null +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClL2NormalizationFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void ClL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f7b7911f4c --- /dev/null +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor); + +class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor> +{ +public: + ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + // Purposely not a CLL2Normalize function. See constructor. + mutable arm_compute::CLNormalizationLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp new file mode 100644 index 0000000000..352698ad1b --- /dev/null +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp @@ -0,0 +1,391 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClLstmFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/cl/ClLayerSupport.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/runtime/CL/functions/CLLSTMLayer.h> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) + : FloatWorkload<LstmQueueDescriptor>(descriptor, info) +{ + arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param; + + // Basic parameters + m_InputToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); + + m_InputToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); + + m_InputToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); + + m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); + + m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); + + m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); + + m_ForgetGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); + + m_CellBiasTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); + + m_OutputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); + + // for future reference: check the AndroidNN API for the logic here + if (!m_Data.m_Parameters.m_CifgEnabled) + { + m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); + + m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); + + m_CellToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + if (m_Data.m_CellToInputWeights != nullptr) + { + BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo()); + } + + m_InputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); + + lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(), + m_RecurrentToInputWeightsTensor.get(), + m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr, + m_InputGateBiasTensor.get()); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + m_ProjectionWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo()); + + m_ProjectionBiasTensor = std::make_unique<arm_compute::CLTensor>(); + if (m_Data.m_ProjectionBias != nullptr) + { + BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo()); + } + + lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(), + m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr); + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + m_CellToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo()); + + m_CellToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); + BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo()); + + lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get()); + } + + const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + const arm_compute::ICLTensor& output_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + const arm_compute::ICLTensor& cell_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor(); + + arm_compute::ICLTensor& output_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor(); + arm_compute::ICLTensor& cell_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[2])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[3])->GetTensor(); + + // Get the batch_size and the num_units from the cellStateIn dimensions + const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2]; + const unsigned int batch_size = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[0]); + const unsigned int num_units = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[1]); + + m_ScratchBuffer = std::make_unique<arm_compute::CLTensor>(); + if (m_Data.m_Parameters.m_CifgEnabled) + { + // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG + armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1); + } + else + { + // scratch_buffer [num_units * 3, batch_size] without CIFG + armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2); + } + + float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell; + float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (m_Data.m_Parameters.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (m_Data.m_Parameters.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + + m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), + m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), + m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), + m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), + &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, + &cell_state_out, &output, lstm_param, activationLayerInfo, + cell_threshold, projection_threshold); + + armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); + + InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights); + InitializeArmComputeClTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights); + InitializeArmComputeClTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights); + InitializeArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights); + InitializeArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights); + InitializeArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights); + InitializeArmComputeClTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias); + InitializeArmComputeClTensorData(*m_CellBiasTensor, m_Data.m_CellBias); + InitializeArmComputeClTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias); + + if (!m_Data.m_Parameters.m_CifgEnabled) + { + InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights); + InitializeArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights); + if (m_Data.m_CellToInputWeights != nullptr) + { + InitializeArmComputeClTensorData(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights); + } + InitializeArmComputeClTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + InitializeArmComputeClTensorData(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights); + if (m_Data.m_ProjectionBias != nullptr) + { + InitializeArmComputeClTensorData(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias); + } + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + InitializeArmComputeClTensorData(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights); + InitializeArmComputeClTensorData(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_LstmLayer.prepare(); + FreeUnusedTensors(); +} + +void ClLstmFloatWorkload::Execute() const +{ + m_LstmLayer.run(); +} + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights) +{ + arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info; + + // The inputs and the outputs + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn); + const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn); + const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer); + const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut); + const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + // Basic parameters + const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights); + const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights); + const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights); + const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo + = BuildArmComputeTensorInfo(recurrentToForgetWeights); + const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo + = BuildArmComputeTensorInfo(recurrentToCellWeights); + const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo + = BuildArmComputeTensorInfo(recurrentToOutputWeights); + const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias); + const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias); + const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias); + + arm_compute::TensorInfo aclInputToInputWeightsInfo; + arm_compute::TensorInfo aclRecurrentToInputWeightsInfo; + arm_compute::TensorInfo aclCellToInputWeightsInfo; + arm_compute::TensorInfo aclInputGateBiasInfo; + arm_compute::TensorInfo aclProjectionWeightsInfo; + arm_compute::TensorInfo aclProjectionBiasInfo; + arm_compute::TensorInfo aclCellToForgetWeightsInfo; + arm_compute::TensorInfo aclCellToOutputWeightsInfo; + + if (!descriptor.m_CifgEnabled) + { + armnn::TensorInfo inputToInputWInfo = *inputToInputWeights; + aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo); + armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights; + aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo); + + if (cellToInputWeights != nullptr) + { + armnn::TensorInfo cellToInputWInfo = *cellToInputWeights; + aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo); + } + armnn::TensorInfo inputGateBiasInfo = *inputGateBias; + aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo); + lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo, + cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr, + &aclInputGateBiasInfo); + } + + if (descriptor.m_ProjectionEnabled) + { + const armnn::TensorInfo& projectionWInfo = *projectionWeights; + aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo); + + if (projectionBias != nullptr) + { + const armnn::TensorInfo& projectionBiasInfo = *projectionBias; + aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo); + } + lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, + projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr); + } + + if (descriptor.m_PeepholeEnabled) + { + const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights; + aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo); + const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights; + aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo); + lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo); + } + + float cell_threshold = descriptor.m_ClippingThresCell; + float projection_threshold = descriptor.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (descriptor.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (descriptor.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (descriptor.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (descriptor.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (descriptor.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo, + &aclInputToCellWeightsInfo, + &aclInputToOutputWeightsInfo, + &aclRecurrentToForgetWeightsInfo, + &aclRecurrentToCellWeightsInfo, + &aclRecurrentToOutputWeightsInfo, + &aclForgetGateBiasInfo, + &aclCellBiasInfo, + &aclOutputGateBiasInfo, + &aclOutputStateInInfo, &aclCellStateInInfo, + &aclScratchBufferInfo, &aclOutputStateOutInfo, + &aclCellStateOutInfo, &aclOutputInfo, + lstm_params_info, activationLayerInfo, + cell_threshold, projection_threshold); +} + +void ClLstmFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_InputToInputWeightsTensor); + FreeTensorIfUnused(m_InputToForgetWeightsTensor); + FreeTensorIfUnused(m_InputToCellWeightsTensor); + FreeTensorIfUnused(m_InputToOutputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); + FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); + FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); + FreeTensorIfUnused(m_CellToInputWeightsTensor); + FreeTensorIfUnused(m_CellToForgetWeightsTensor); + FreeTensorIfUnused(m_CellToOutputWeightsTensor); + FreeTensorIfUnused(m_InputGateBiasTensor); + FreeTensorIfUnused(m_ForgetGateBiasTensor); + FreeTensorIfUnused(m_CellBiasTensor); + FreeTensorIfUnused(m_OutputGateBiasTensor); + FreeTensorIfUnused(m_ProjectionWeightsTensor); + FreeTensorIfUnused(m_ProjectionBiasTensor); + FreeTensorIfUnused(m_ScratchBuffer); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp new file mode 100644 index 0000000000..352d774a99 --- /dev/null +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp @@ -0,0 +1,68 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +class ClLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor> +{ +public: + ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLLSTMLayer m_LstmLayer; + + std::unique_ptr<arm_compute::CLTensor> m_InputToInputWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_InputToForgetWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_InputToCellWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_InputToOutputWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_RecurrentToInputWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_RecurrentToForgetWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_RecurrentToCellWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_RecurrentToOutputWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_CellToInputWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_CellToForgetWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_CellToOutputWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_InputGateBiasTensor; + std::unique_ptr<arm_compute::CLTensor> m_ForgetGateBiasTensor; + std::unique_ptr<arm_compute::CLTensor> m_CellBiasTensor; + std::unique_ptr<arm_compute::CLTensor> m_OutputGateBiasTensor; + std::unique_ptr<arm_compute::CLTensor> m_ProjectionWeightsTensor; + std::unique_ptr<arm_compute::CLTensor> m_ProjectionBiasTensor; + + std::unique_ptr<arm_compute::CLTensor> m_ScratchBuffer; + + void FreeUnusedTensors(); +}; + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor &descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights); +} //namespace armnn diff --git a/src/backends/cl/workloads/ClMergerFloatWorkload.cpp b/src/backends/cl/workloads/ClMergerFloatWorkload.cpp new file mode 100644 index 0000000000..151f1e0ee7 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerFloatWorkload.cpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloatWorkload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClMergerFloatWorkload.hpp b/src/backends/cl/workloads/ClMergerFloatWorkload.hpp new file mode 100644 index 0000000000..9782f7a8f3 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerFloatWorkload : public ClBaseMergerWorkload<DataType::Float16, DataType::Float32> +{ +public: + using ClBaseMergerWorkload<DataType::Float16, DataType::Float32>::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClMergerUint8Workload.cpp b/src/backends/cl/workloads/ClMergerUint8Workload.cpp new file mode 100644 index 0000000000..9d1060d857 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute"); + ClBaseMergerWorkload<DataType::QuantisedAsymm8>::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClMergerUint8Workload.hpp b/src/backends/cl/workloads/ClMergerUint8Workload.hpp new file mode 100644 index 0000000000..cbfc19a0f2 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerUint8Workload : public ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8> +{ +public: + using ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..d53e149129 --- /dev/null +++ b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMultiplicationFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + + +ClMultiplicationFloatWorkload::ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClMultiplicationFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_PixelWiseMultiplication.configure(&input0, + &input1, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_NEAREST_EVEN); +} + +void ClMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloatWorkload_Execute"); + + // Executes the layer. + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..a793ac64df --- /dev/null +++ b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClMultiplicationFloatWorkload : public FloatWorkload<MultiplicationQueueDescriptor> +{ +public: + ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload<MultiplicationQueueDescriptor>::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..969c9bb08b --- /dev/null +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp @@ -0,0 +1,51 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClNormalizationFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/cl/ClLayerSupport.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include "ClWorkloadUtils.hpp" + +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + + arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +}; + +void ClNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f30be91aaa --- /dev/null +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class ClNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor> +{ +public: + ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp new file mode 100644 index 0000000000..45dc5e8be7 --- /dev/null +++ b/src/backends/cl/workloads/ClPadWorkload.cpp @@ -0,0 +1,63 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPadWorkload.hpp" + +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <arm_compute/core/Types.h> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +template <armnn::DataType... T> +ClPadWorkload<T...>::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) +: TypedWorkload<PadQueueDescriptor, T...>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); + arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(descriptor.m_Parameters.m_PadList); + + m_Layer.configure(&input, &output, padList); +} + +template <armnn::DataType... T> +void ClPadWorkload<T...>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute"); + m_Layer.run(); +} + +bool ClPadValidate(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(descriptor.m_PadList); + + const arm_compute::Status aclStatus = arm_compute::CLPadLayer::validate(&aclInputInfo, + &aclOutputInfo, + padList); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} // namespace armnn + +template class armnn::ClPadWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; +template class armnn::ClPadWorkload<armnn::DataType::QuantisedAsymm8>; diff --git a/src/backends/cl/workloads/ClPadWorkload.hpp b/src/backends/cl/workloads/ClPadWorkload.hpp new file mode 100644 index 0000000000..a7ad6670a7 --- /dev/null +++ b/src/backends/cl/workloads/ClPadWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/WorkloadData.hpp> +#include <backends/Workload.hpp> +#include <arm_compute/runtime/CL/functions/CLPadLayer.h> + +namespace armnn { + +template <armnn::DataType... dataTypes> +class ClPadWorkload : public TypedWorkload<PadQueueDescriptor, dataTypes...> +{ +public: + ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLPadLayer m_Layer; +}; + +bool ClPadValidate(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported); + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp new file mode 100644 index 0000000000..079772dbaf --- /dev/null +++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPermuteWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/core/Error.h> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor) +{ + const armnn::PermutationVector& perm = descriptor.m_DimMappings; + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U }) + && !perm.IsEqual({ 0U, 2U, 3U, 1U }) + && !perm.IsEqual({ 3U, 2U, 0U, 1U }), + "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported"); + + return arm_compute::Status{}; +} + +template <armnn::DataType... DataTypes> +ClPermuteWorkload<DataTypes...>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType... DataTypes> +void ClPermuteWorkload<DataTypes...>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class ClPermuteWorkload<DataType::Float16, DataType::Float32>; +template class ClPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClPermuteWorkload.hpp b/src/backends/cl/workloads/ClPermuteWorkload.hpp new file mode 100644 index 0000000000..8ff5707ad6 --- /dev/null +++ b/src/backends/cl/workloads/ClPermuteWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> + +#include <armnn/TypesUtils.hpp> +#include <arm_compute/runtime/CL/functions/CLPermute.h> + +#include <string> + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor); + +template<armnn::DataType... DataTypes> +class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...> +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("ClPermuteWorkload"); + return name; + } + + ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data; + mutable arm_compute::CLPermute m_PermuteFunction; +}; + +using ClPermuteFloatWorkload = ClPermuteWorkload<DataType::Float16, DataType::Float32>; +using ClPermuteUint8Workload = ClPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp b/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..98911856fe --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dBaseWorkload.hpp" +#include <backends/cl/ClLayerSupport.hpp> +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template <armnn::DataType... dataTypes> +ClPooling2dBaseWorkload<dataTypes...>::ClPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + // Run the layer. + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>; +template class ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>; + +} diff --git a/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp b/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..8f9db08ddc --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template <armnn::DataType... dataTypes> +class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...> +{ +public: + using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data; + + ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::CLPoolingLayer m_PoolingLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp b/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..dc9d17f0ae --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dFloatWorkload::ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>(descriptor, info, "ClPooling2dFloatWorkload") +{ +} + +void ClPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp b/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..ba9294c40f --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ +class ClPooling2dFloatWorkload : public ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32> +{ +public: + ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp b/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..0b4b15f806 --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>(descriptor, info, "ClPooling2dUint8Workload") +{ +} + +void ClPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp b/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..b07f955343 --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload<DataType::QuantisedAsymm8> +{ +public: + ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp b/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..4da3bbd703 --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClReshapeFloatWorkload::ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp b/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..e5fc20ec8b --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +class ClReshapeFloatWorkload : public FloatWorkload<ReshapeQueueDescriptor> +{ +public: + ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClReshapeUint8Workload.cpp b/src/backends/cl/workloads/ClReshapeUint8Workload.cpp new file mode 100644 index 0000000000..8fbee151fc --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeUint8Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeUint8Workload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output); +} + +void ClReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute"); + + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClReshapeUint8Workload.hpp b/src/backends/cl/workloads/ClReshapeUint8Workload.hpp new file mode 100644 index 0000000000..654437a4c1 --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +// Reshape +class ClReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> +{ +public: + ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp new file mode 100644 index 0000000000..499466e959 --- /dev/null +++ b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClResizeBilinearFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/cl/ClLayerSupport.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClResizeBilinearFloatWorkload::ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<ResizeBilinearQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClResizeBilinearFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR, + arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f), + arm_compute::SamplingPolicy::TOP_LEFT); +}; + +void ClResizeBilinearFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloatWorkload_Execute"); + m_ResizeBilinearLayer.run(); +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp new file mode 100644 index 0000000000..f29f416907 --- /dev/null +++ b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +class ClResizeBilinearFloatWorkload : public FloatWorkload<ResizeBilinearQueueDescriptor> +{ +public: + ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLScale m_ResizeBilinearLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..eb05a19670 --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxBaseWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h> + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo); +} + +} diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..b800056cdf --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnn/Tensor.hpp> +#include <arm_compute/core/Error.h> + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..606005659f --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxFloatWorkload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void ClSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..b400b3c7ea --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +class ClSoftmaxFloatWorkload : public FloatWorkload<SoftmaxQueueDescriptor> +{ +public: + ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..7e0589e89f --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxUint8Workload.hpp" +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void ClSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..4786faf60b --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include <memory> + +namespace armnn +{ +// Softmax +class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> +{ +public: + ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + void Execute() const override; +private: + + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp b/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..5fd634bdb6 --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloatWorkload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp b/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..a0b5846f8e --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class ClSplitterFloatWorkload : public ClBaseSplitterWorkload<DataType::Float16, DataType::Float32> +{ +public: + using ClBaseSplitterWorkload<DataType::Float16, DataType::Float32>::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterUint8Workload.cpp b/src/backends/cl/workloads/ClSplitterUint8Workload.cpp new file mode 100644 index 0000000000..50a251ada7 --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterUint8Workload.hpp b/src/backends/cl/workloads/ClSplitterUint8Workload.hpp new file mode 100644 index 0000000000..19e8be5034 --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ +class ClSplitterUint8Workload : public ClBaseSplitterWorkload<DataType::QuantisedAsymm8> +{ +public: + using ClBaseSplitterWorkload<DataType::QuantisedAsymm8>::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp new file mode 100644 index 0000000000..37b334d94e --- /dev/null +++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSubtractionWorkload.hpp" + +#include <backends/cl/ClTensorHandle.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template <armnn::DataType... T> +ClSubtractionWorkload<T...>::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<SubtractionQueueDescriptor, T...>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template <armnn::DataType... T> +void ClSubtractionWorkload<T...>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); + m_Layer.run(); +} + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClSubtractionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; +template class armnn::ClSubtractionWorkload<armnn::DataType::QuantisedAsymm8>; diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp new file mode 100644 index 0000000000..67b219b09d --- /dev/null +++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/CL/CLFunctions.h> + +namespace armnn +{ + +template <armnn::DataType... dataTypes> +class ClSubtractionWorkload : public TypedWorkload<SubtractionQueueDescriptor, dataTypes...> +{ +public: + ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticSubtraction m_Layer; +}; + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloadUtils.hpp b/src/backends/cl/workloads/ClWorkloadUtils.hpp new file mode 100644 index 0000000000..3a8ff00bb6 --- /dev/null +++ b/src/backends/cl/workloads/ClWorkloadUtils.hpp @@ -0,0 +1,63 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OpenClTimer.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include <Half.hpp> + +#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ + ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ + name, \ + armnn::OpenClTimer(), \ + armnn::WallClockTimer()) + +namespace armnn +{ + +template <typename T> +void CopyArmComputeClTensorData(arm_compute::CLTensor& dstTensor, const T* srcData) +{ + { + ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting"); + dstTensor.map(true); + } + + { + ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor"); + armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor); + } + + dstTensor.unmap(); +} + +inline void InitializeArmComputeClTensorData(arm_compute::CLTensor& clTensor, + const ConstCpuTensorHandle* handle) +{ + BOOST_ASSERT(handle); + + armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>()); + break; + case DataType::Float32: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>()); + break; + case DataType::QuantisedAsymm8: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>()); + break; + case DataType::Signed32: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>()); + break; + default: + BOOST_ASSERT_MSG(false, "Unexpected tensor type."); + } +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp new file mode 100644 index 0000000000..3329f42e08 --- /dev/null +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "ClActivationFloatWorkload.hpp" +#include "ClActivationUint8Workload.hpp" +#include "ClAdditionWorkload.hpp" +#include "ClBaseConstantWorkload.hpp" +#include "ClBaseMergerWorkload.hpp" +#include "ClBatchNormalizationFloatWorkload.hpp" +#include "ClConstantFloatWorkload.hpp" +#include "ClConstantUint8Workload.hpp" +#include "ClConvolution2dFloatWorkload.hpp" +#include "ClConvolution2dUint8Workload.hpp" +#include "ClDepthwiseConvolutionFloatWorkload.hpp" +#include "ClDepthwiseConvolutionUint8Workload.hpp" +#include "ClDivisionFloatWorkload.hpp" +#include "ClFloorFloatWorkload.hpp" +#include "ClFullyConnectedWorkload.hpp" +#include "ClL2NormalizationFloatWorkload.hpp" +#include "ClLstmFloatWorkload.hpp" +#include "ClMergerFloatWorkload.hpp" +#include "ClMergerUint8Workload.hpp" +#include "ClMultiplicationFloatWorkload.hpp" +#include "ClNormalizationFloatWorkload.hpp" +#include "ClPermuteWorkload.hpp" +#include "ClPadWorkload.hpp" +#include "ClPooling2dFloatWorkload.hpp" +#include "ClPooling2dUint8Workload.hpp" +#include "ClReshapeFloatWorkload.hpp" +#include "ClReshapeUint8Workload.hpp" +#include "ClResizeBilinearFloatWorkload.hpp" +#include "ClSoftmaxFloatWorkload.hpp" +#include "ClSoftmaxUint8Workload.hpp" +#include "ClSplitterFloatWorkload.hpp" +#include "ClSplitterUint8Workload.hpp" +#include "ClSubtractionWorkload.hpp" +#include "ClConvertFp16ToFp32Workload.hpp" +#include "ClConvertFp32ToFp16Workload.hpp" |