diff options
Diffstat (limited to 'src/backends/cl')
128 files changed, 0 insertions, 12104 deletions
diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt deleted file mode 100644 index 64b04636f7..0000000000 --- a/src/backends/cl/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -# -# Copyright © 2017 Arm Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -if(ARMCOMPUTECL) - list(APPEND armnnClBackend_sources - ClBackend.cpp - ClBackend.hpp - ClBackendContext.cpp - ClBackendContext.hpp - ClBackendId.hpp - ClContextControl.cpp - ClContextControl.hpp - ClLayerSupport.cpp - ClLayerSupport.hpp - ClRegistryInitializer.cpp - ClTensorHandle.hpp - ClTensorHandleFactory.cpp - ClTensorHandleFactory.hpp - ClWorkloadFactory.cpp - ClWorkloadFactory.hpp - OpenClTimer.cpp - OpenClTimer.hpp - ) - - add_subdirectory(workloads) - - if(BUILD_UNIT_TESTS) - add_subdirectory(test) - endif() - -else() - list(APPEND armnnClBackend_sources - ClBackendId.hpp - ClLayerSupport.cpp - ClLayerSupport.hpp - ) -endif() - -add_library(armnnClBackend OBJECT ${armnnClBackend_sources}) -target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) -target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) -target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/backends) -target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/profiling) - diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp deleted file mode 100644 index f662754693..0000000000 --- a/src/backends/cl/ClBackend.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBackend.hpp" -#include "ClBackendId.hpp" -#include "ClWorkloadFactory.hpp" -#include "ClBackendContext.hpp" -#include "ClLayerSupport.hpp" -#include "ClTensorHandleFactory.hpp" - -#include <armnn/BackendRegistry.hpp> - -#include <aclCommon/BaseMemoryManager.hpp> - -#include <armnn/backends/IBackendContext.hpp> -#include <armnn/backends/IMemoryManager.hpp> - -#include <Optimizer.hpp> - -#include <arm_compute/runtime/CL/CLBufferAllocator.h> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ - -const BackendId& ClBackend::GetIdStatic() -{ - static const BackendId s_Id{ClBackendId()}; - return s_Id; -} - -IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const -{ - return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); -} - -IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( - const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const -{ - return std::make_unique<ClWorkloadFactory>( - boost::polymorphic_pointer_downcast<ClMemoryManager>(memoryManager)); -} - -IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( - TensorHandleFactoryRegistry& registry) const -{ - auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); - - registry.RegisterMemoryManager(memoryManager); - - return std::make_unique<ClWorkloadFactory>( - boost::polymorphic_pointer_downcast<ClMemoryManager>(memoryManager)); -} - -std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const -{ - return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()}; -} - -void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) -{ - auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); - - registry.RegisterMemoryManager(mgr); - registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr)); -} - -IBackendInternal::IBackendContextPtr -ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const -{ - return IBackendContextPtr{new ClBackendContext{options}}; -} - -IBackendInternal::IBackendProfilingContextPtr ClBackend::CreateBackendProfilingContext( - const IRuntime::CreationOptions&, IBackendProfilingPtr&) -{ - return IBackendProfilingContextPtr{}; -} - -IBackendInternal::Optimizations ClBackend::GetOptimizations() const -{ - return Optimizations{}; -} - -IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport() const -{ - static ILayerSupportSharedPtr layerSupport{new ClLayerSupport}; - return layerSupport; -} - -OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const -{ - OptimizationViews optimizationViews; - - optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); - - return optimizationViews; -} - -} // namespace armnn diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp deleted file mode 100644 index e85c616505..0000000000 --- a/src/backends/cl/ClBackend.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/backends/IBackendInternal.hpp> - -namespace armnn -{ - -class ClBackend : public IBackendInternal -{ -public: - ClBackend() = default; - ~ClBackend() = default; - - static const BackendId& GetIdStatic(); - const BackendId& GetId() const override { return GetIdStatic(); } - - IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override; - - IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( - const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override; - - IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( - TensorHandleFactoryRegistry& registry) const override; - - std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override; - - void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override; - - IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; - IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext( - const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override; - - IBackendInternal::Optimizations GetOptimizations() const override; - IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; - - OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const override; -}; - -} // namespace armnn diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp deleted file mode 100644 index 068e2958af..0000000000 --- a/src/backends/cl/ClBackendContext.cpp +++ /dev/null @@ -1,292 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBackendContext.hpp" -#include "ClContextControl.hpp" - -#include <armnn/Logging.hpp> - -#include <arm_compute/core/CL/OpenCL.h> -#include <arm_compute/core/CL/CLKernelLibrary.h> -#include <arm_compute/runtime/CL/CLScheduler.h> -#include <arm_compute/runtime/CL/CLTunerTypes.h> - -#include <boost/polymorphic_cast.hpp> - -namespace armnn -{ - -struct ClBackendContext::ClContextControlWrapper -{ - ClContextControlWrapper(arm_compute::CLTuner* tuner, - bool profilingEnabled) - : m_ClContextControl(tuner, profilingEnabled) - {} - - bool Sync() - { - if (arm_compute::CLScheduler::get().context()() != NULL) - { - // Waits for all queued CL requests to finish before unloading the network they may be using. - try - { - // Coverity fix: arm_compute::CLScheduler::sync() may throw an exception of type cl::Error. - arm_compute::CLScheduler::get().sync(); - } - catch (const cl::Error&) - { - ARMNN_LOG(warning) << "Runtime::UnloadNetwork(): an error occurred while waiting for " - "the queued CL requests to finish"; - return false; - } - } - - return true; - } - - void ClearClCache() - { - if (arm_compute::CLScheduler::get().context()() != NULL) - { - // There are no loaded networks left, so clear the CL cache to free up memory - m_ClContextControl.ClearClCache(); - } - } - - ClContextControl m_ClContextControl; -}; - -std::string LowerString(std::string value) -{ - std::transform(value.begin(), value.end(), value.begin(), - [](unsigned char c){ return std::tolower(c); }); - - return value; -} - -enum class TuningLevel -{ - None, - Rapid, - Normal, - Exhaustive -}; - - -TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defaultValue) -{ - if (value.IsInt()) - { - int v = value.IsInt(); - if (v > static_cast<int>(TuningLevel::Exhaustive) || - v < static_cast<int>(TuningLevel::None)) - { - ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. " - "Using default(" << static_cast<int>(defaultValue) << ")"; - } else - { - return static_cast<TuningLevel>(v); - } - } - return defaultValue; -} - -bool ParseBoolean(const BackendOptions::Var& value, bool defaultValue) -{ - if (value.IsBool()) - { - return value.AsBool(); - } - - return defaultValue; -} - -std::string ParseFile(const BackendOptions::Var& value, std::string defaultValue) -{ - if (value.IsString()) - { - return value.AsString(); - } - return defaultValue; -} - -template <typename F> -void ParseOptions(const std::vector<BackendOptions>& options, BackendId backend, F f) -{ - for (auto optionsGroup : options) - { - if (optionsGroup.GetBackendId() == backend) - { - for (size_t i=0; i < optionsGroup.GetOptionCount(); i++) - { - const BackendOptions::BackendOption option = optionsGroup.GetOption(i); - f(option.GetName(), option.GetValue()); - } - } - } -} - -void ConfigureTuner(arm_compute::CLTuner &tuner, TuningLevel level) -{ - tuner.set_tune_new_kernels(true); // Turn on tuning initially. - - switch (level) - { - case TuningLevel::Rapid: - tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID); - break; - case TuningLevel::Normal: - tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL); - break; - case TuningLevel::Exhaustive: - tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE); - break; - case TuningLevel::None: - default: - tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode. - break; - } -} - -ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) - : IBackendContext(options) - , m_TuningFile() -{ - bool kernelProfiling = options.m_EnableGpuProfiling; - - arm_compute::CLTuner* tuner = nullptr; - bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr; - if (useLegacyTunerAPI) - { - auto clTunerParams = boost::polymorphic_downcast<ClTunedParameters*>( - options.m_GpuAccTunedParameters.get()); - tuner = &clTunerParams->m_Tuner; - - if (tuner) - { - auto ConvertTuningLevel = [](IGpuAccTunedParameters::TuningLevel level, - armnn::IGpuAccTunedParameters::Mode mode) - { - if (mode == armnn::IGpuAccTunedParameters::Mode::UseTunedParameters) - { - return TuningLevel::None; - } - - switch(level) - { - case IGpuAccTunedParameters::TuningLevel::Rapid: - return TuningLevel::Rapid; - case IGpuAccTunedParameters::TuningLevel::Normal: - return TuningLevel::Normal; - case IGpuAccTunedParameters::TuningLevel::Exhaustive: - return TuningLevel::Exhaustive; - default: - { - BOOST_ASSERT_MSG(false, "Tuning level not recognised."); - return TuningLevel::None; - } - } - }; - - TuningLevel tuningLevel = ConvertTuningLevel(clTunerParams->m_TuningLevel, clTunerParams->m_Mode); - ConfigureTuner(*tuner, tuningLevel); - } - } - else //New backend options API - { - const TuningLevel defaultTuningLevel = TuningLevel::None; - auto tuningLevel = defaultTuningLevel; - - ParseOptions(options.m_BackendOptions, "GpuAcc", [&](std::string name, const BackendOptions::Var& value) - { - if (name == "KernelProfilingEnabled") - { - kernelProfiling |= ParseBoolean(value, false); - } else if (name == "TuningFile") - { - m_TuningFile = ParseFile(value, ""); - } else if (name == "TuningLevel") - { - tuningLevel = ParseTuningLevel(value, defaultTuningLevel); - } - }); - - // Create the tuner, in tuning mode initially. - m_Tuner = std::make_unique<arm_compute::CLTuner>(true); - - ConfigureTuner(*(m_Tuner.get()), tuningLevel); - - if (!m_TuningFile.empty()) - { - try - { - m_Tuner->load_from_file(m_TuningFile.c_str()); - } catch (const std::exception& e) - { - ARMNN_LOG(warning) << "Could not load GpuAcc tuner data file."; - } - - tuner = m_Tuner.get(); - } - } - - m_ClContextControlWrapper = std::make_unique<ClContextControlWrapper>( - tuner, - kernelProfiling - ); -} - -bool ClBackendContext::BeforeLoadNetwork(NetworkId) -{ - return true; -} - -bool ClBackendContext::AfterLoadNetwork(NetworkId networkId) -{ - { - std::lock_guard<std::mutex> lockGuard(m_Mutex); - m_NetworkIds.insert(networkId); - } - return true; -} - -bool ClBackendContext::BeforeUnloadNetwork(NetworkId) -{ - return m_ClContextControlWrapper->Sync(); -} - -bool ClBackendContext::AfterUnloadNetwork(NetworkId networkId) -{ - bool clearCache = false; - { - std::lock_guard<std::mutex> lockGuard(m_Mutex); - m_NetworkIds.erase(networkId); - clearCache = m_NetworkIds.empty(); - } - - if (clearCache) - { - m_ClContextControlWrapper->ClearClCache(); - } - - return true; -} - -ClBackendContext::~ClBackendContext() -{ - if (m_Tuner && !m_TuningFile.empty()) - { - try - { - m_Tuner->save_to_file(m_TuningFile.c_str()); - } - catch(const std::exception& e) - { - ARMNN_LOG(warning) << "Could not save GpuAcc tuner data to file " << m_TuningFile; - } - } -} - -} // namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp deleted file mode 100644 index bcac0d245e..0000000000 --- a/src/backends/cl/ClBackendContext.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/backends/IBackendContext.hpp> -#include <unordered_set> -#include <mutex> - -#include <arm_compute/runtime/CL/CLTuner.h> - -namespace armnn -{ - -class ClBackendContext : public IBackendContext -{ -public: - ClBackendContext(const IRuntime::CreationOptions& options); - - bool BeforeLoadNetwork(NetworkId networkId) override; - bool AfterLoadNetwork(NetworkId networkId) override; - - bool BeforeUnloadNetwork(NetworkId networkId) override; - bool AfterUnloadNetwork(NetworkId networkId) override; - - ~ClBackendContext() override; - -private: - std::mutex m_Mutex; - struct ClContextControlWrapper; - std::unique_ptr<ClContextControlWrapper> m_ClContextControlWrapper; - - std::unordered_set<NetworkId> m_NetworkIds; - - std::unique_ptr<arm_compute::CLTuner> m_Tuner; - std::string m_TuningFile; -}; - -} // namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/ClBackendId.hpp b/src/backends/cl/ClBackendId.hpp deleted file mode 100644 index 3f8fec2a69..0000000000 --- a/src/backends/cl/ClBackendId.hpp +++ /dev/null @@ -1,12 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -namespace armnn -{ - -constexpr const char * ClBackendId() { return "GpuAcc"; } - -} // namespace armnn diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp deleted file mode 100644 index 72c8e9fe45..0000000000 --- a/src/backends/cl/ClContextControl.cpp +++ /dev/null @@ -1,218 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClContextControl.hpp" - -#include <armnn/Exceptions.hpp> - -#include <LeakChecking.hpp> - -#include <arm_compute/core/CL/CLKernelLibrary.h> -#include <arm_compute/runtime/CL/CLScheduler.h> - -#include <boost/assert.hpp> -#include <boost/format.hpp> -#include <boost/polymorphic_cast.hpp> -#include <boost/core/ignore_unused.hpp> - -namespace cl -{ -class Context; -class CommandQueue; -class Device; -} - -namespace armnn -{ - -ClContextControl::ClContextControl(arm_compute::CLTuner *tuner, - bool profilingEnabled) - : m_Tuner(tuner) - , m_ProfilingEnabled(profilingEnabled) -{ - // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled. - boost::ignore_unused(m_ProfilingEnabled); - - try - { - std::vector<cl::Platform> platforms; - cl::Platform::get(&platforms); - - // Selects default platform for the first element. - cl::Platform::setDefault(platforms[0]); - - std::vector<cl::Device> devices; - platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); - - // Selects default device for the first element. - cl::Device::setDefault(devices[0]); - } - catch (const cl::Error& clError) - { - throw ClRuntimeUnavailableException(boost::str(boost::format( - "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" - ) % clError.what() % clError.err())); - } - - // Removes the use of global CL context. - cl::Context::setDefault(cl::Context{}); - BOOST_ASSERT(cl::Context::getDefault()() == NULL); - - // Removes the use of global CL command queue. - cl::CommandQueue::setDefault(cl::CommandQueue{}); - BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL); - - // Always load the OpenCL runtime. - LoadOpenClRuntime(); -} - -ClContextControl::~ClContextControl() -{ - // Load the OpencCL runtime without the tuned parameters to free the memory for them. - try - { - UnloadOpenClRuntime(); - } - catch (const cl::Error& clError) - { - // This should not happen, it is ignored if it does. - - // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an - // exception of type std::length_error. - // Using stderr instead in this context as there is no point in nesting try-catch blocks here. - std::cerr << "A CL error occurred unloading the runtime tuner parameters: " - << clError.what() << ". CL error code is: " << clError.err() << std::endl; - } -} - -void ClContextControl::LoadOpenClRuntime() -{ - DoLoadOpenClRuntime(true); -} - -void ClContextControl::UnloadOpenClRuntime() -{ - DoLoadOpenClRuntime(false); -} - -void ClContextControl::DoLoadOpenClRuntime(bool updateTunedParameters) -{ - cl::Device device = cl::Device::getDefault(); - cl::Context context; - cl::CommandQueue commandQueue; - - if (arm_compute::CLScheduler::get().is_initialised() && arm_compute::CLScheduler::get().context()() != NULL) - { - // Wait for all queued CL requests to finish before reinitialising it. - arm_compute::CLScheduler::get().sync(); - } - - try - { - arm_compute::CLKernelLibrary::get().clear_programs_cache(); - // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no - // context references); it is initialised again, with a proper context, later. - arm_compute::CLScheduler::get().init(context, commandQueue, device); - arm_compute::CLKernelLibrary::get().init(".", context, device); - - { - // - // Here we replace the context with a new one in which - // the memory leak checks show it as an extra allocation but - // because of the scope of the leak checks, it doesn't count - // the disposal of the original object. On the other hand it - // does count the creation of this context which it flags - // as a memory leak. By adding the following line we prevent - // this to happen. - // - ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE(); - context = cl::Context(device); - } - - // NOTE: In this specific case profiling has to be enabled on the command queue - // in order for the CLTuner to work. - bool profilingNeededForClTuner = updateTunedParameters && m_Tuner && - m_Tuner->tune_new_kernels(); - - if (m_ProfilingEnabled || profilingNeededForClTuner) - { - // Create a new queue with profiling enabled. - commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); - } - else - { - // Use default queue. - commandQueue = cl::CommandQueue(context, device); - } - } - catch (const cl::Error& clError) - { - throw ClRuntimeUnavailableException(boost::str(boost::format( - "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" - ) % clError.what() % clError.err())); - } - - // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. - arm_compute::CLKernelLibrary::get().init(".", context, device); - arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner); -} - -void ClContextControl::ClearClCache() -{ - DoLoadOpenClRuntime(true); -} - -armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) -{ - return new ClTunedParameters(mode, tuningLevel); -} - -armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) -{ - return IGpuAccTunedParametersPtr(CreateRaw(mode, tuningLevel), &IGpuAccTunedParameters::Destroy); -} - -void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) -{ - delete params; -} - -ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) - : m_Mode(mode) - , m_TuningLevel(tuningLevel) - , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) -{ -} - -void ClTunedParameters::Load(const char* filename) -{ - try - { - m_Tuner.load_from_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + - e.what()); - } -} - -void ClTunedParameters::Save(const char* filename) const -{ - try - { - m_Tuner.save_to_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + - e.what()); - } -} - -} // namespace armnn diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp deleted file mode 100644 index fd27ced1f9..0000000000 --- a/src/backends/cl/ClContextControl.hpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "armnn/IRuntime.hpp" - -#include <arm_compute/runtime/CL/CLTuner.h> - -namespace armnn -{ - -// ARM Compute OpenCL context control. -class ClContextControl -{ -public: - - ClContextControl(arm_compute::CLTuner* = nullptr, - bool profilingEnabled = false); - - virtual ~ClContextControl(); - - void LoadOpenClRuntime(); - - // Users should call this (after freeing all of the cl::Context objects they use) - // to release the cached memory used by the compute library. - void UnloadOpenClRuntime(); - - // Clear the CL cache, without losing the tuned parameter settings. - void ClearClCache(); - -private: - - void DoLoadOpenClRuntime(bool updateTunedParameters); - - arm_compute::CLTuner* m_Tuner; - - bool m_ProfilingEnabled; -}; - -class ClTunedParameters : public IGpuAccTunedParameters -{ -public: - ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode, armnn::IGpuAccTunedParameters::TuningLevel tuningLevel); - - virtual void Load(const char* filename); - virtual void Save(const char* filename) const; - - Mode m_Mode; - TuningLevel m_TuningLevel; - - arm_compute::CLTuner m_Tuner; -}; - -} // namespace armnn diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp deleted file mode 100644 index e8548e4b5a..0000000000 --- a/src/backends/cl/ClLayerSupport.cpp +++ /dev/null @@ -1,822 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClLayerSupport.hpp" -#include "ClBackendId.hpp" - -#include <armnn/Descriptors.hpp> -#include <armnn/BackendRegistry.hpp> - -#include <InternalTypes.hpp> -#include <LayerSupportCommon.hpp> - -#include <boost/core/ignore_unused.hpp> - -#if defined(ARMCOMPUTECL_ENABLED) -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include "workloads/ClAbsWorkload.hpp" -#include "workloads/ClAdditionWorkload.hpp" -#include "workloads/ClActivationWorkload.hpp" -#include "workloads/ClArgMinMaxWorkload.hpp" -#include "workloads/ClBatchNormalizationFloatWorkload.hpp" -#include "workloads/ClBatchToSpaceNdWorkload.hpp" -#include "workloads/ClConvertFp16ToFp32Workload.hpp" -#include "workloads/ClConvertFp32ToFp16Workload.hpp" -#include "workloads/ClConvolution2dWorkload.hpp" -#include "workloads/ClDepthToSpaceWorkload.hpp" -#include "workloads/ClDepthwiseConvolutionWorkload.hpp" -#include "workloads/ClDequantizeWorkload.hpp" -#include "workloads/ClDivisionFloatWorkload.hpp" -#include "workloads/ClFloorFloatWorkload.hpp" -#include "workloads/ClFullyConnectedWorkload.hpp" -#include "workloads/ClGreaterWorkload.hpp" -#include "workloads/ClInstanceNormalizationWorkload.hpp" -#include "workloads/ClL2NormalizationFloatWorkload.hpp" -#include "workloads/ClLstmFloatWorkload.hpp" -#include "workloads/ClMaximumWorkload.hpp" -#include "workloads/ClMeanWorkload.hpp" -#include "workloads/ClConcatWorkload.hpp" -#include "workloads/ClMinimumWorkload.hpp" -#include "workloads/ClMultiplicationWorkload.hpp" -#include "workloads/ClNormalizationFloatWorkload.hpp" -#include "workloads/ClPadWorkload.hpp" -#include "workloads/ClPermuteWorkload.hpp" -#include "workloads/ClPooling2dWorkload.hpp" -#include "workloads/ClPreluWorkload.hpp" -#include "workloads/ClReshapeWorkload.hpp" -#include "workloads/ClResizeWorkload.hpp" -#include "workloads/ClRsqrtWorkload.hpp" -#include "workloads/ClQuantizedLstmWorkload.hpp" -#include "workloads/ClQuantizeWorkload.hpp" -#include "workloads/ClSliceWorkload.hpp" -#include "workloads/ClSoftmaxBaseWorkload.hpp" -#include "workloads/ClSpaceToBatchNdWorkload.hpp" -#include "workloads/ClSpaceToDepthWorkload.hpp" -#include "workloads/ClSplitterWorkload.hpp" -#include "workloads/ClStackWorkload.hpp" -#include "workloads/ClStridedSliceWorkload.hpp" -#include "workloads/ClSubtractionWorkload.hpp" -#include "workloads/ClTransposeConvolution2dWorkload.hpp" -#endif - -using namespace boost; - -namespace armnn -{ - -namespace -{ - -template<unsigned int FilterSize> -bool IsMatchingSize2d(const TensorInfo& weightInfo) -{ - // Width & Height must match. - return (weightInfo.GetShape()[3] == FilterSize) && (weightInfo.GetShape()[2] == FilterSize); -} - -template<uint32_t ValidStride> -bool IsMatchingStride(uint32_t actualStride) -{ - return ValidStride == actualStride; -} - -template<uint32_t FirstStride, uint32_t SecondStride, uint32_t... ValidStrides> -bool IsMatchingStride(uint32_t actualStride) -{ - return IsMatchingStride<FirstStride>(actualStride) || IsMatchingStride<SecondStride, ValidStrides...>(actualStride); -} - -template<typename ... Args> -bool IsClBackendSupported(Optional<std::string&> reasonIfUnsupported, Args... args) -{ - boost::ignore_unused(reasonIfUnsupported, (args)...); -#if defined(ARMCOMPUTECL_ENABLED) - return true; -#else - if (reasonIfUnsupported) - { - reasonIfUnsupported.value() = "The armnn library has been built without CL support"; - } - return false; -#endif -} - -#if defined(ARMCOMPUTECL_ENABLED) -#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) (expr) -#else -#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) IsClBackendSupported(reasonIfUnsupported) -#endif - -#if defined(ARMCOMPUTECL_ENABLED) -template<class FuncType, class... Args> -inline bool IsWorkloadSupported(FuncType&& func, Optional<std::string&> reasonIfUnsupported, Args&&... args) -{ - arm_compute::Status aclStatus = func(std::forward<Args>(args)...); - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - reasonIfUnsupported.value() = aclStatus.error_description(); - } - return supported; -} - -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); -#else -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsClBackendSupported(reasonIfUnsupported, __VA_ARGS__); -#endif - -template<typename FloatFunc, typename Uint8Func, typename ... Params> -bool IsSupportedForDataTypeCl(Optional<std::string&> reasonIfUnsupported, - DataType dataType, - FloatFunc floatFuncPtr, - Uint8Func uint8FuncPtr, - Params&&... params) -{ - return IsClBackendSupported(reasonIfUnsupported) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - dataType, - floatFuncPtr, - floatFuncPtr, - uint8FuncPtr, - &FalseFunc<>, - &FalseFunc<>, - std::forward<Params>(params)...); -} -} // anonymous namespace - -bool ClLayerSupport::IsAbsSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - ElementwiseUnaryDescriptor descriptor(UnaryOperation::Abs); - return IsElementwiseUnarySupported(input, output, descriptor, reasonIfUnsupported); -} - -bool ClLayerSupport::IsActivationSupported(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClActivationWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsAdditionSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClAdditionValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool ClLayerSupport::IsArgMinMaxSupported(const TensorInfo& input, - const TensorInfo& output, - const ArgMinMaxDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - - FORWARD_WORKLOAD_VALIDATE_FUNC(ClArgMinMaxWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchNormalizationValidate, - reasonIfUnsupported, - input, - output, - mean, - var, - beta, - gamma, - descriptor); -} - -bool ClLayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input, - const TensorInfo& output, - const BatchToSpaceNdDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchToSpaceNdWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsComparisonSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - const ComparisonDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - if (descriptor.m_Operation == ComparisonOperation::Greater) - { - FORWARD_WORKLOAD_VALIDATE_FUNC(ClGreaterWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); - } - - return false; -} - -bool ClLayerSupport::IsConcatSupported(const std::vector<const TensorInfo*> inputs, - const TensorInfo& output, - const ConcatDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - if (descriptor.GetNumDimensions() <= descriptor.GetConcatAxis()) - { - SetValueChecked(reasonIfUnsupported, "Cl Concat: Concat axis > Number of dimensions."); - return false; - } - - unsigned int concatInnerAxis = (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1; - if(concatInnerAxis < 3) // Width, height, or channels - { - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConcatWorkloadValidate, - reasonIfUnsupported, - inputs, - output, - descriptor); - } - else if (concatInnerAxis == 3) - { - // We rely on the sub-tensor optimization to handle the batch dimension for 4D tensors. If we can't use - // sub-tensors for this then we can't support it. Here is where we check that the sub-tensors will work. - for (auto& input : inputs) - { - if (input && !output.IsTypeSpaceMatch(*input)) // Cannot use sub-tensors if the types are not same space - { - SetValueChecked(reasonIfUnsupported, "Cl Concat: Types and quantization parameters must match."); - return false; - } - } - return true; // Sub-tensors support concat along batch - } - else // > 4 dimensions not supported. - { - SetValueChecked(reasonIfUnsupported, "Cl Concat: Maximum of 4 dimensions supported."); - return false; - } -} - -bool ClLayerSupport::IsConstantSupported(const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool ClLayerSupport::IsConvertFp16ToFp32Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp16ToFp32WorkloadValidate, - reasonIfUnsupported, - input, - output); -} - -bool ClLayerSupport::IsConvertFp32ToFp16Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp32ToFp16WorkloadValidate, - reasonIfUnsupported, - input, - output); -} - -bool ClLayerSupport::IsConvolution2dSupported(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvolution2dWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool ClLayerSupport::IsDequantizeSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDequantizeWorkloadValidate, - reasonIfUnsupported, - input, - output); -} - -bool ClLayerSupport::IsDepthToSpaceSupported(const TensorInfo& input, - const TensorInfo& output, - const DepthToSpaceDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthToSpaceWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthwiseConvolutionWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthwiseConvolutionWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - - -bool ClLayerSupport::IsDivisionSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool ClLayerSupport::IsElementwiseUnarySupported(const TensorInfo& input, - const TensorInfo& output, - const ElementwiseUnaryDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - if (descriptor.m_Operation == UnaryOperation::Abs) - { - FORWARD_WORKLOAD_VALIDATE_FUNC(ClAbsWorkloadValidate, - reasonIfUnsupported, - input, - output); - } - else if (descriptor.m_Operation == UnaryOperation::Rsqrt) - { - FORWARD_WORKLOAD_VALIDATE_FUNC(ClRsqrtWorkloadValidate, - reasonIfUnsupported, - input, - output); - } - - return false; -} - -bool ClLayerSupport::IsFloorSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClFloorWorkloadValidate, - reasonIfUnsupported, - input, - output); -} - -bool ClLayerSupport::IsFullyConnectedSupported(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClFullyConnectedWorkloadValidate, - reasonIfUnsupported, - input, - output, - weights, - biases, - descriptor); -} - -bool ClLayerSupport::IsGreaterSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - ComparisonDescriptor descriptor(ComparisonOperation::Greater); - return IsComparisonSupported(input0, input1, output, descriptor, reasonIfUnsupported); -} - -bool ClLayerSupport::IsInputSupported(const TensorInfo& input, - Optional<std::string&> reasonIfUnsupported) const -{ - return IsClBackendSupported(reasonIfUnsupported, input); -} - -bool ClLayerSupport::IsInstanceNormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const InstanceNormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClInstanceNormalizationWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsL2NormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsLstmSupported(const TensorInfo& input, - const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, - const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, - const TensorInfo& cellStateOut, - const TensorInfo& output, - const LstmDescriptor& descriptor, - const LstmInputParamsInfo& paramsInfo, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate, - reasonIfUnsupported, - input, - outputStateIn, - cellStateIn, - scratchBuffer, - outputStateOut, - cellStateOut, - output, - descriptor, - paramsInfo); -} - -bool ClLayerSupport::IsMaximumSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClMaximumWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool ClLayerSupport::IsMeanSupported(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClMeanValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs, - const TensorInfo& output, - const MergerDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - return IsConcatSupported(inputs, output, descriptor, reasonIfUnsupported); -} - -bool ClLayerSupport::IsMinimumSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClMinimumWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool ClLayerSupport::IsMultiplicationSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool ClLayerSupport::IsNormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool ClLayerSupport::IsOutputSupported(const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - return IsClBackendSupported(reasonIfUnsupported, output); -} - -bool ClLayerSupport::IsPadSupported(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsPermuteSupported(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool ClLayerSupport::IsPooling2dSupported(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool ClLayerSupport::IsPreluSupported(const armnn::TensorInfo &input, - const armnn::TensorInfo &alpha, - const armnn::TensorInfo &output, - armnn::Optional<std::string &> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPreluWorkloadValidate, reasonIfUnsupported, input, alpha, output); -} - -bool ClLayerSupport::IsQuantizedLstmSupported(const TensorInfo& input, - const TensorInfo& previousCellStateIn, - const TensorInfo& previousOutputIn, - const TensorInfo& cellStateOut, - const TensorInfo& output, - const QuantizedLstmInputParamsInfo& paramsInfo, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClQuantizedLstmWorkloadValidate, - reasonIfUnsupported, - input, - previousCellStateIn, - previousOutputIn, - cellStateOut, - output, - paramsInfo); -} - -bool ClLayerSupport::IsQuantizeSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClQuantizeWorkloadValidate, - reasonIfUnsupported, - input, - output); -} - -bool ClLayerSupport::IsReshapeSupported(const TensorInfo& input, - const TensorInfo& output, - const ReshapeDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - ignore_unused(descriptor); - FORWARD_WORKLOAD_VALIDATE_FUNC(ClReshapeWorkloadValidate, reasonIfUnsupported, input, output); -} - -bool ClLayerSupport::IsResizeSupported(const TensorInfo& input, - const TensorInfo& output, - const ResizeDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClResizeWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool ClLayerSupport::IsResizeBilinearSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - ResizeDescriptor descriptor; - descriptor.m_Method = ResizeMethod::Bilinear; - descriptor.m_DataLayout = DataLayout::NCHW; - - const TensorShape& outputShape = output.GetShape(); - descriptor.m_TargetHeight = outputShape[2]; - descriptor.m_TargetWidth = outputShape[3]; - - return IsResizeSupported(input, output, descriptor, reasonIfUnsupported); -} - -bool ClLayerSupport::IsRsqrtSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - ElementwiseUnaryDescriptor descriptor(UnaryOperation::Rsqrt); - return IsElementwiseUnarySupported(input, output, descriptor, reasonIfUnsupported); -} - -bool ClLayerSupport::IsSliceSupported(const TensorInfo& input, - const TensorInfo& output, - const SliceDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSliceWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool ClLayerSupport::IsSoftmaxSupported(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool ClLayerSupport::IsSpaceToBatchNdSupported(const TensorInfo& input, - const TensorInfo& output, - const SpaceToBatchNdDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSpaceToBatchNdWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsSpaceToDepthSupported(const TensorInfo& input, - const TensorInfo& output, - const SpaceToDepthDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSpaceToDepthWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsSplitterSupported(const TensorInfo& input, - const ViewsDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeCl(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool ClLayerSupport::IsSplitterSupported(const TensorInfo& input, - const std::vector<std::reference_wrapper<TensorInfo>>& outputs, - const ViewsDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ -#if defined(ARMCOMPUTECL_ENABLED) - // Split along the last dimension, cannot use sub-tensors - // as width and height of the sub-tensors do not match - // the width and height of the parent tensor - // in case of input with more than 2D. - std::set<unsigned int> splitAxis = ComputeSplitAxis(descriptor, input.GetShape()); - if (descriptor.GetNumDimensions() > 2 && splitAxis.size() == 1 && - *splitAxis.begin() == descriptor.GetNumDimensions() - 1 ) - { - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSplitterWorkloadValidate, - reasonIfUnsupported, - input, - outputs, - *splitAxis.begin()); - } -#endif - boost::ignore_unused(descriptor); - for (auto output : outputs) - { - if (!input.IsTypeSpaceMatch(output)) // Cannot use sub-tensors if the types are not same space - { - SetValueChecked(reasonIfUnsupported, "Cl Splitter: Types and quantization parameters must match."); - return false; - } - } - return true; -} - -bool ClLayerSupport::IsStackSupported(const std::vector<const TensorInfo*>& inputs, - const TensorInfo& output, - const StackDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClStackWorkloadValidate, - reasonIfUnsupported, - inputs, - output, - descriptor); -} - -bool ClLayerSupport::IsStridedSliceSupported(const TensorInfo& input, - const TensorInfo& output, - const StridedSliceDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClStridedSliceWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool ClLayerSupport::IsSubtractionSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSubtractionValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool ClLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input, - const TensorInfo& output, - const TransposeConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reasonIfUnsupported) const -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClTransposeConvolution2dWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -} // namespace armnn diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp deleted file mode 100644 index 819d086cb4..0000000000 --- a/src/backends/cl/ClLayerSupport.hpp +++ /dev/null @@ -1,291 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <backendsCommon/LayerSupportBase.hpp> - -namespace armnn -{ - -class ClLayerSupport : public LayerSupportBase -{ -public: - ARMNN_DEPRECATED_MSG("Use IsElementwiseUnarySupported instead") - bool IsAbsSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsActivationSupported(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsAdditionSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsArgMinMaxSupported(const TensorInfo& input, - const TensorInfo& output, - const ArgMinMaxDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsBatchNormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsBatchToSpaceNdSupported(const TensorInfo& input, - const TensorInfo& output, - const BatchToSpaceNdDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsComparisonSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& ouput, - const ComparisonDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsConcatSupported(const std::vector<const TensorInfo*> inputs, - const TensorInfo& output, - const ConcatDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsConstantSupported(const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsConvertFp16ToFp32Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsConvertFp32ToFp16Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsConvolution2dSupported(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsDequantizeSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsDepthToSpaceSupported(const TensorInfo& input, - const TensorInfo& output, - const DepthToSpaceDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsDepthwiseConvolutionSupported(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reason = EmptyOptional()) const override; - - bool IsDivisionSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsElementwiseUnarySupported(const TensorInfo& input, - const TensorInfo& ouput, - const ElementwiseUnaryDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsFloorSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsFullyConnectedSupported(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - ARMNN_DEPRECATED_MSG("Use IsComparisonSupported instead") - bool IsGreaterSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& ouput, - Optional<std::string&> reasonIfUnsupported) const override; - - bool IsInputSupported(const TensorInfo& input, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsInstanceNormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const InstanceNormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsL2NormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsLstmSupported(const TensorInfo& input, - const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, - const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, - const TensorInfo& cellStateOut, - const TensorInfo& output, - const LstmDescriptor& descriptor, - const LstmInputParamsInfo& paramsInfo, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsMaximumSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsMeanSupported(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - ARMNN_DEPRECATED_MSG("Use IsConcatSupported instead") - bool IsMergerSupported(const std::vector<const TensorInfo*> inputs, - const TensorInfo& output, - const MergerDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsMinimumSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsMultiplicationSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsNormalizationSupported(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsOutputSupported(const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsPadSupported(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsPermuteSupported(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsPooling2dSupported(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsPreluSupported(const TensorInfo& input, - const TensorInfo& alpha, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsQuantizedLstmSupported(const TensorInfo& input, - const TensorInfo& previousCellStateIn, - const TensorInfo& previousOutputIn, - const TensorInfo& cellStateOut, - const TensorInfo& output, - const QuantizedLstmInputParamsInfo& paramsInfo, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsQuantizeSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsReshapeSupported(const TensorInfo& input, - const TensorInfo& output, - const ReshapeDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsResizeSupported(const TensorInfo& input, - const TensorInfo& output, - const ResizeDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - ARMNN_DEPRECATED_MSG("Use IsResizeSupported instead") - bool IsResizeBilinearSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - ARMNN_DEPRECATED_MSG("Use IsElementwiseUnarySupported instead") - bool IsRsqrtSupported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsSliceSupported(const TensorInfo& input, - const TensorInfo& output, - const SliceDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsSoftmaxSupported(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsSpaceToBatchNdSupported(const TensorInfo& input, - const TensorInfo& output, - const SpaceToBatchNdDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsSpaceToDepthSupported(const TensorInfo& input, - const TensorInfo& output, - const SpaceToDepthDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - ARMNN_DEPRECATED_MSG("Use IsSplitterSupported with outputs instead") - bool IsSplitterSupported(const TensorInfo& input, - const ViewsDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsSplitterSupported(const TensorInfo& input, - const std::vector<std::reference_wrapper<TensorInfo>>& outputs, - const ViewsDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsStackSupported(const std::vector<const TensorInfo*>& inputs, - const TensorInfo& output, - const StackDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsStridedSliceSupported(const TensorInfo& input, - const TensorInfo& output, - const StridedSliceDescriptor& descriptor, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsSubtractionSupported(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; - - bool IsTransposeConvolution2dSupported(const TensorInfo& input, - const TensorInfo& output, - const TransposeConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; -}; - -} // namespace armnn diff --git a/src/backends/cl/ClRegistryInitializer.cpp b/src/backends/cl/ClRegistryInitializer.cpp deleted file mode 100644 index 8decd6f689..0000000000 --- a/src/backends/cl/ClRegistryInitializer.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBackend.hpp" - -#include <armnn/BackendRegistry.hpp> - -namespace -{ - -using namespace armnn; - -static BackendRegistry::StaticRegistryInitializer g_RegisterHelper -{ - BackendRegistryInstance(), - ClBackend::GetIdStatic(), - []() - { - return IBackendInternalUniquePtr(new ClBackend); - } -}; - -} // Anonymous namespace diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp deleted file mode 100644 index 1830d186b6..0000000000 --- a/src/backends/cl/ClTensorHandle.hpp +++ /dev/null @@ -1,304 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <aclCommon/ArmComputeTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <Half.hpp> - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/runtime/CL/CLSubTensor.h> -#include <arm_compute/runtime/IMemoryGroup.h> -#include <arm_compute/runtime/MemoryGroup.h> -#include <arm_compute/core/TensorShape.h> -#include <arm_compute/core/Coordinates.h> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ - - -class IClTensorHandle : public IAclTensorHandle -{ -public: - virtual arm_compute::ICLTensor& GetTensor() = 0; - virtual arm_compute::ICLTensor const& GetTensor() const = 0; - virtual arm_compute::DataType GetDataType() const = 0; - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0; -}; - -class ClTensorHandle : public IClTensorHandle -{ -public: - ClTensorHandle(const TensorInfo& tensorInfo) - { - armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); - } - - ClTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout) - { - armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); - } - - arm_compute::CLTensor& GetTensor() override { return m_Tensor; } - arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } - virtual void Allocate() override {armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);} - - virtual void Manage() override - { - assert(m_MemoryGroup != nullptr); - m_MemoryGroup->manage(&m_Tensor); - } - - virtual const void* Map(bool blocking = true) const override - { - const_cast<arm_compute::CLTensor*>(&m_Tensor)->map(blocking); - return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - - virtual void Unmap() const override { const_cast<arm_compute::CLTensor*>(&m_Tensor)->unmap(); } - - virtual ITensorHandle* GetParent() const override { return nullptr; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override - { - m_MemoryGroup = boost::polymorphic_pointer_downcast<arm_compute::MemoryGroup>(memoryGroup); - } - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } - -private: - // Only used for testing - void CopyOutTo(void* memory) const override - { - const_cast<armnn::ClTensorHandle*>(this)->Map(true); - switch(this->GetDataType()) - { - case arm_compute::DataType::F32: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<float*>(memory)); - break; - case arm_compute::DataType::U8: - case arm_compute::DataType::QASYMM8: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<uint8_t*>(memory)); - break; - case arm_compute::DataType::QSYMM8_PER_CHANNEL: - case arm_compute::DataType::QASYMM8_SIGNED: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<int8_t*>(memory)); - break; - case arm_compute::DataType::F16: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<armnn::Half*>(memory)); - break; - case arm_compute::DataType::S16: - case arm_compute::DataType::QSYMM16: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<int16_t*>(memory)); - break; - case arm_compute::DataType::S32: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<int32_t*>(memory)); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - const_cast<armnn::ClTensorHandle*>(this)->Unmap(); - } - - // Only used for testing - void CopyInFrom(const void* memory) override - { - this->Map(true); - switch(this->GetDataType()) - { - case arm_compute::DataType::F32: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::U8: - case arm_compute::DataType::QASYMM8: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::F16: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::S16: - case arm_compute::DataType::QSYMM8_PER_CHANNEL: - case arm_compute::DataType::QASYMM8_SIGNED: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::QSYMM16: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::S32: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), - this->GetTensor()); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - this->Unmap(); - } - - arm_compute::CLTensor m_Tensor; - std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup; -}; - -class ClSubTensorHandle : public IClTensorHandle -{ -public: - ClSubTensorHandle(IClTensorHandle* parent, - const arm_compute::TensorShape& shape, - const arm_compute::Coordinates& coords) - : m_Tensor(&parent->GetTensor(), shape, coords) - { - parentHandle = parent; - } - - arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } - arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } - - virtual void Allocate() override {} - virtual void Manage() override {} - - virtual const void* Map(bool blocking = true) const override - { - const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->map(blocking); - return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - virtual void Unmap() const override { const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->unmap(); } - - virtual ITensorHandle* GetParent() const override { return parentHandle; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {} - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } - -private: - // Only used for testing - void CopyOutTo(void* memory) const override - { - const_cast<ClSubTensorHandle*>(this)->Map(true); - switch(this->GetDataType()) - { - case arm_compute::DataType::F32: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<float*>(memory)); - break; - case arm_compute::DataType::U8: - case arm_compute::DataType::QASYMM8: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<uint8_t*>(memory)); - break; - case arm_compute::DataType::F16: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<armnn::Half*>(memory)); - break; - case arm_compute::DataType::QSYMM8_PER_CHANNEL: - case arm_compute::DataType::QASYMM8_SIGNED: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<int8_t*>(memory)); - break; - case arm_compute::DataType::S16: - case arm_compute::DataType::QSYMM16: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<int16_t*>(memory)); - break; - case arm_compute::DataType::S32: - armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), - static_cast<int32_t*>(memory)); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - const_cast<ClSubTensorHandle*>(this)->Unmap(); - } - - // Only used for testing - void CopyInFrom(const void* memory) override - { - this->Map(true); - switch(this->GetDataType()) - { - case arm_compute::DataType::F32: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::U8: - case arm_compute::DataType::QASYMM8: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::F16: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::QSYMM8_PER_CHANNEL: - case arm_compute::DataType::QASYMM8_SIGNED: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::S16: - case arm_compute::DataType::QSYMM16: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), - this->GetTensor()); - break; - case arm_compute::DataType::S32: - armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), - this->GetTensor()); - break; - default: - { - throw armnn::UnimplementedException(); - } - } - this->Unmap(); - } - - mutable arm_compute::CLSubTensor m_Tensor; - ITensorHandle* parentHandle = nullptr; -}; - -} // namespace armnn diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp deleted file mode 100644 index 9df3f1a4a6..0000000000 --- a/src/backends/cl/ClTensorHandleFactory.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - - -#include "ClTensorHandleFactory.hpp" -#include "ClTensorHandle.hpp" - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/core/Coordinates.h> -#include <arm_compute/runtime/CL/CLSubTensor.h> - -#include <boost/polymorphic_cast.hpp> - - -namespace armnn -{ - -using FactoryId = ITensorHandleFactory::FactoryId; - -std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateSubTensorHandle(ITensorHandle& parent, - const TensorShape& subTensorShape, - const unsigned int* subTensorOrigin) const -{ - arm_compute::Coordinates coords; - arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); - - coords.set_num_dimensions(subTensorShape.GetNumDimensions()); - for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); ++i) - { - // Arm compute indexes tensor coords in reverse order. - unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; - coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); - } - - const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape( - parent.GetShape()); - if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape)) - { - return nullptr; - } - - return std::make_unique<ClSubTensorHandle>( - boost::polymorphic_downcast<IClTensorHandle *>(&parent), shape, coords); -} - -std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - return ClTensorHandleFactory::CreateTensorHandle(tensorInfo, true); -} - -std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const -{ - return ClTensorHandleFactory::CreateTensorHandle(tensorInfo, dataLayout, true); -} - -std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - const bool IsMemoryManaged) const -{ - std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo); - if (IsMemoryManaged) - { - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - } - return tensorHandle; -} - -std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout, - const bool IsMemoryManaged) const -{ - std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout); - if (IsMemoryManaged) - { - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - } - return tensorHandle; -} - -const FactoryId& ClTensorHandleFactory::GetIdStatic() -{ - static const FactoryId s_Id(ClTensorHandleFactoryId()); - return s_Id; -} - -const FactoryId& ClTensorHandleFactory::GetId() const -{ - return GetIdStatic(); -} - -bool ClTensorHandleFactory::SupportsSubTensors() const -{ - return true; -} - -MemorySourceFlags ClTensorHandleFactory::GetExportFlags() const -{ - return m_ExportFlags; -} - -MemorySourceFlags ClTensorHandleFactory::GetImportFlags() const -{ - return m_ImportFlags; -} - -} // namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/ClTensorHandleFactory.hpp b/src/backends/cl/ClTensorHandleFactory.hpp deleted file mode 100644 index aab3faad0a..0000000000 --- a/src/backends/cl/ClTensorHandleFactory.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/backends/ITensorHandleFactory.hpp> -#include <aclCommon/BaseMemoryManager.hpp> -#include <armnn/backends/IMemoryManager.hpp> -#include <armnn/MemorySources.hpp> - -namespace armnn -{ - -constexpr const char* ClTensorHandleFactoryId() { return "Arm/Cl/TensorHandleFactory"; } - -class ClTensorHandleFactory : public ITensorHandleFactory { -public: - static const FactoryId m_Id; - - ClTensorHandleFactory(std::shared_ptr<ClMemoryManager> mgr) - : m_MemoryManager(mgr), - m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)), - m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)) - {} - - std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, - const TensorShape& subTensorShape, - const unsigned int* subTensorOrigin) const override; - - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; - - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; - - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - const bool IsMemoryManaged = true) const override; - - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout, - const bool IsMemoryManaged = true) const override; - - static const FactoryId& GetIdStatic(); - - const FactoryId& GetId() const override; - - bool SupportsSubTensors() const override; - - MemorySourceFlags GetExportFlags() const override; - - MemorySourceFlags GetImportFlags() const override; - -private: - mutable std::shared_ptr<ClMemoryManager> m_MemoryManager; - MemorySourceFlags m_ImportFlags; - MemorySourceFlags m_ExportFlags; -}; - -} // namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp deleted file mode 100644 index 4bb2e2a8ce..0000000000 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ /dev/null @@ -1,544 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "ClWorkloadFactory.hpp" -#include "ClBackendId.hpp" - -#include <Layer.hpp> - -#include <armnn/Exceptions.hpp> -#include <armnn/Utils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> -#include <backendsCommon/MakeWorkloadHelper.hpp> -#include <backendsCommon/MemCopyWorkload.hpp> -#include <backendsCommon/MemImportWorkload.hpp> - -#include <cl/ClTensorHandle.hpp> -#include <cl/workloads/ClWorkloads.hpp> -#include <cl/workloads/ClWorkloadUtils.hpp> - -#include <arm_compute/core/CL/CLKernelLibrary.h> -#include <arm_compute/runtime/CL/CLBufferAllocator.h> -#include <arm_compute/runtime/CL/CLScheduler.h> - -#include <boost/core/ignore_unused.hpp> -#include <boost/polymorphic_cast.hpp> -#include <boost/format.hpp> - -namespace armnn -{ - -namespace -{ -static const BackendId s_Id{ClBackendId()}; -} - -bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, - Optional<DataType> dataType, - std::string& outReasonIfUnsupported) -{ - return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); -} - -const BackendId& ClWorkloadFactory::GetBackendId() const -{ - return s_Id; -} - -template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args> -std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, - const WorkloadInfo& info, - Args&&... args) -{ - try - { - return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...); - } - catch (const cl::Error& clError) - { - throw WrapClError(clError, CHECK_LOCATION()); - } -} - -template <typename Workload, typename QueueDescriptorType, typename... Args> -std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, - const WorkloadInfo& info, - Args&&... args) -{ - try - { - return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...); - } - catch (const cl::Error& clError) - { - throw WrapClError(clError, CHECK_LOCATION()); - } -} - -ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager) - : m_MemoryManager(memoryManager) -{ -} - -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - const bool IsMemoryManaged) const -{ - boost::ignore_unused(IsMemoryManaged); - std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - - return tensorHandle; -} - -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout, - const bool IsMemoryManaged) const -{ - boost::ignore_unused(IsMemoryManaged); - std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - - return tensorHandle; -} - -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const -{ - arm_compute::Coordinates coords; - arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); - - coords.set_num_dimensions(subTensorShape.GetNumDimensions()); - for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) - { - // Arm compute indexes tensor coords in reverse order. - unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; - coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); - } - - const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape()); - if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape)) - { - return nullptr; - } - - return std::make_unique<ClSubTensorHandle>( - boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - boost::ignore_unused(descriptor); - - ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor; - elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Abs); - - return CreateElementwiseUnary(elementwiseUnaryDescriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClActivationWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClAdditionWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<ClArgMinMaxWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization( - const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Parameters.m_Operation == ComparisonOperation::Greater) - { - GreaterQueueDescriptor greaterQueueDescriptor; - greaterQueueDescriptor.m_Inputs = descriptor.m_Inputs; - greaterQueueDescriptor.m_Outputs = descriptor.m_Outputs; - - return MakeWorkload<ClGreaterFloat32Workload, ClGreaterUint8Workload>(greaterQueueDescriptor, info); - } - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClConcatWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClConstantWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClDequantizeWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDetectionPostProcess( - const DetectionPostProcessQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Parameters.m_Operation == UnaryOperation::Abs) - { - AbsQueueDescriptor absQueueDescriptor; - absQueueDescriptor.m_Inputs = descriptor.m_Inputs; - absQueueDescriptor.m_Outputs = descriptor.m_Outputs; - - return MakeWorkload<ClAbsWorkload>(absQueueDescriptor, info); - } - else if (descriptor.m_Parameters.m_Operation == UnaryOperation::Rsqrt) - { - RsqrtQueueDescriptor rsqrtQueueDescriptor; - rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs; - rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs; - - return MakeWorkload<ClRsqrtWorkload>(rsqrtQueueDescriptor, info); - } - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateEqual(const EqualQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - boost::ignore_unused(descriptor); - - ComparisonQueueDescriptor comparisonDescriptor; - comparisonDescriptor.m_Parameters = ComparisonDescriptor(ComparisonOperation::Equal); - - return CreateComparison(comparisonDescriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - boost::ignore_unused(descriptor); - - ComparisonQueueDescriptor comparisonDescriptor; - comparisonDescriptor.m_Parameters = ComparisonDescriptor(ComparisonOperation::Greater); - - return CreateComparison(comparisonDescriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInstanceNormalization( - const InstanceNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClMaximumWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClMeanWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) - { - throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); - } - - return MakeWorkload<CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) - { - throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload"); - } - - return std::make_unique<ImportMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return CreateConcat(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClMinimumWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClMultiplicationWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique<CopyMemGenericWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClPadWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClPermuteWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClPooling2dWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor, - const WorkloadInfo &info) const -{ - return MakeWorkload<ClPreluWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClQuantizeWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClReshapeWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClResizeWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - ResizeQueueDescriptor resizeDescriptor; - resizeDescriptor.m_Inputs = descriptor.m_Inputs; - resizeDescriptor.m_Outputs = descriptor.m_Outputs; - - resizeDescriptor.m_Parameters.m_Method = ResizeMethod::Bilinear; - resizeDescriptor.m_Parameters.m_DataLayout = descriptor.m_Parameters.m_DataLayout; - resizeDescriptor.m_Parameters.m_TargetHeight = descriptor.m_Parameters.m_TargetHeight; - resizeDescriptor.m_Parameters.m_TargetWidth = descriptor.m_Parameters.m_TargetWidth; - - return CreateResize(resizeDescriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - boost::ignore_unused(descriptor); - - ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor; - elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt); - - return CreateElementwiseUnary(elementwiseUnaryDescriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSliceWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSoftmaxFloatWorkload, ClSoftmaxUint8Workload>(descriptor, info, - m_MemoryManager->GetIntraLayerManager()); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSplitterWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClStackWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClStridedSliceWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClSubtractionWorkload>(descriptor, info); -} - -std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTransposeConvolution2d( - const TransposeConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); -} - -} // namespace armnn diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp deleted file mode 100644 index 980be9192e..0000000000 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ /dev/null @@ -1,230 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <armnn/IRuntime.hpp> -#include <armnn/Optional.hpp> - -#include <backendsCommon/WorkloadFactoryBase.hpp> -#include <aclCommon/BaseMemoryManager.hpp> - -namespace armnn -{ - -// ARM Compute OpenCL workload factory. -class ClWorkloadFactory : public WorkloadFactoryBase -{ -public: - ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager); - - const BackendId& GetBackendId() const override; - - static bool IsLayerSupported(const Layer& layer, - Optional<DataType> dataType, - std::string& outReasonIfUnsupported); - - bool SupportsSubTensors() const override { return true; } - - std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const override; - - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - const bool IsMemoryManaged = true) const override; - - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout, - const bool IsMemoryManaged = true) const override; - - ARMNN_DEPRECATED_MSG("Use CreateElementwiseUnary instead") - std::unique_ptr<IWorkload> CreateAbs(const AbsQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateComparison(const ComparisonQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateConcat(const ConcatQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateDequantize(const DequantizeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateEqual(const EqualQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateGreater(const GreaterQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& Info) const override; - - std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateMemImport(const MemImportQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - ARMNN_DEPRECATED_MSG("Use CreateConcat instead") - std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreatePrelu(const PreluQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateQuantize(const QuantizeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateResize(const ResizeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - ARMNN_DEPRECATED_MSG("Use CreateResize instead") - std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - ARMNN_DEPRECATED_MSG("Use CreateElementwiseUnary instead") - std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateSlice(const SliceQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateStack(const StackQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - -private: - template<typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args> - static std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, - const WorkloadInfo& info, - Args&&... args); - - template <typename Workload, typename QueueDescriptorType, typename... Args> - static std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, - const WorkloadInfo& info, - Args&&... args); - - mutable std::shared_ptr<ClMemoryManager> m_MemoryManager; -}; - -} // namespace armnn diff --git a/src/backends/cl/OpenClTimer.cpp b/src/backends/cl/OpenClTimer.cpp deleted file mode 100644 index ee3c114ba0..0000000000 --- a/src/backends/cl/OpenClTimer.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "OpenClTimer.hpp" - -#include <string> -#include <sstream> - -#include <boost/core/ignore_unused.hpp> - -namespace armnn -{ - -OpenClTimer::OpenClTimer() -{ -} - -void OpenClTimer::Start() -{ - m_Kernels.clear(); - - auto interceptor = [this]( cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *gwo, - const size_t *gws, - const size_t *lws, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) - { - boost::ignore_unused(event); - cl_int retVal = 0; - - // Get the name of the kernel - cl::Kernel retainedKernel(kernel, true); - std::stringstream ss; - ss << retainedKernel.getInfo<CL_KERNEL_FUNCTION_NAME>(); - - // Embed workgroup sizes into the name - if(gws != nullptr) - { - ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]"; - } - if(lws != nullptr) - { - ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]"; - } - - cl_event customEvent; - - // Forward to original OpenCl function - retVal = m_OriginalEnqueueFunction( command_queue, - kernel, - work_dim, - gwo, - gws, - lws, - num_events_in_wait_list, - event_wait_list, - &customEvent); - - // Store the Kernel info for later GetMeasurements() call - m_Kernels.emplace_back(ss.str(), customEvent); - - return retVal; - }; - - m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr; - CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; -} - -void OpenClTimer::Stop() -{ - CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction; -} - -std::vector<Measurement> OpenClTimer::GetMeasurements() const -{ - std::vector<Measurement> measurements; - - cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo<CL_QUEUE_PROPERTIES>(); - - int idx = 0; - for (auto& kernel : m_Kernels) - { - std::string name = std::string(this->GetName()) + "/" + std::to_string(idx++) + ": " + kernel.m_Name; - - double timeUs = 0.0; - if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0) - { - // Wait for the event to finish before accessing profile results. - kernel.m_Event.wait(); - - cl_ulong start = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); - cl_ulong end = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); - timeUs = static_cast<double>(end - start) / 1000.0; - } - - measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US); - } - - return measurements; -} - -} //namespace armnn diff --git a/src/backends/cl/OpenClTimer.hpp b/src/backends/cl/OpenClTimer.hpp deleted file mode 100644 index 5539e885d8..0000000000 --- a/src/backends/cl/OpenClTimer.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <Instrument.hpp> - -#include <arm_compute/runtime/CL/CLScheduler.h> -#include <arm_compute/core/CL/OpenCL.h> - -#include <vector> -#include <list> - -namespace armnn -{ - -/// OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). -class OpenClTimer : public Instrument -{ -public: - OpenClTimer(); - ~OpenClTimer() = default; - - /// Start the OpenCl timer - void Start() override; - - /// Stop the OpenCl timer - void Stop() override; - - /// Get the name of the timer - /// \return Name of the timer - const char* GetName() const override { return "OpenClKernelTimer"; } - - /// Get the recorded measurements. This will be a list of the execution durations for all the OpenCl kernels. - /// \return Recorded measurements - std::vector<Measurement> GetMeasurements() const override; - -private: - using CLScheduler = arm_compute::CLScheduler; - using CLSymbols = arm_compute::CLSymbols; - using ClEvent = cl::Event; - using ClEnqueueFunc = decltype(CLSymbols::clEnqueueNDRangeKernel_ptr); - - /// Stores info about the OpenCl kernel - struct KernelInfo - { - KernelInfo(const std::string& name, cl_event& event) : m_Name(name), m_Event(event) {} - - std::string m_Name; - ClEvent m_Event; - }; - - std::list<KernelInfo> m_Kernels; ///< List of all kernels executed - ClEnqueueFunc m_OriginalEnqueueFunction; ///< Keep track of original OpenCl function -}; - -} //namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/backend.cmake b/src/backends/cl/backend.cmake deleted file mode 100644 index 9484b9476d..0000000000 --- a/src/backends/cl/backend.cmake +++ /dev/null @@ -1,14 +0,0 @@ -# -# Copyright © 2017 Arm Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/cl) -list(APPEND armnnLibraries armnnClBackend) - -if(ARMCOMPUTECL) - list(APPEND armnnLibraries armnnClBackendWorkloads) - list(APPEND armnnUnitTestLibraries armnnClBackendUnitTests) -else() - message(STATUS "CL backend is disabled") -endif() diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk deleted file mode 100644 index 4182b94136..0000000000 --- a/src/backends/cl/backend.mk +++ /dev/null @@ -1,113 +0,0 @@ -# -# Copyright © 2017 ARM Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -# BACKEND_SOURCES contains the list of files to be included -# in the Android build and it is picked up by the Android.mk -# file in the root of ArmNN - -# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED is declared in android-nn-driver/Android.mk) -ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1) - -# ARMNN_COMPUTE_CL_ENABLED == 1 -# Include the source files for the CL backend - -BACKEND_SOURCES := \ - ClBackend.cpp \ - ClBackendContext.cpp \ - ClContextControl.cpp \ - ClLayerSupport.cpp \ - ClRegistryInitializer.cpp \ - ClTensorHandleFactory.cpp \ - ClWorkloadFactory.cpp \ - OpenClTimer.cpp \ - workloads/ClAbsWorkload.cpp \ - workloads/ClActivationWorkload.cpp \ - workloads/ClAdditionWorkload.cpp \ - workloads/ClArgMinMaxWorkload.cpp \ - workloads/ClBatchNormalizationFloatWorkload.cpp \ - workloads/ClBatchToSpaceNdWorkload.cpp \ - workloads/ClConcatWorkload.cpp \ - workloads/ClConstantWorkload.cpp \ - workloads/ClConvertFp16ToFp32Workload.cpp \ - workloads/ClConvertFp32ToFp16Workload.cpp \ - workloads/ClConvolution2dWorkload.cpp \ - workloads/ClDepthToSpaceWorkload.cpp \ - workloads/ClDepthwiseConvolutionWorkload.cpp \ - workloads/ClDequantizeWorkload.cpp \ - workloads/ClDivisionFloatWorkload.cpp \ - workloads/ClFloorFloatWorkload.cpp \ - workloads/ClFullyConnectedWorkload.cpp \ - workloads/ClGreaterWorkload.cpp \ - workloads/ClInstanceNormalizationWorkload.cpp \ - workloads/ClL2NormalizationFloatWorkload.cpp \ - workloads/ClLstmFloatWorkload.cpp \ - workloads/ClMaximumWorkload.cpp \ - workloads/ClMeanWorkload.cpp \ - workloads/ClMinimumWorkload.cpp \ - workloads/ClMultiplicationWorkload.cpp \ - workloads/ClNormalizationFloatWorkload.cpp \ - workloads/ClPadWorkload.cpp \ - workloads/ClPermuteWorkload.cpp \ - workloads/ClPooling2dWorkload.cpp \ - workloads/ClPreluWorkload.cpp \ - workloads/ClQuantizedLstmWorkload.cpp \ - workloads/ClQuantizeWorkload.cpp \ - workloads/ClReshapeWorkload.cpp \ - workloads/ClResizeWorkload.cpp \ - workloads/ClRsqrtWorkload.cpp \ - workloads/ClSliceWorkload.cpp \ - workloads/ClSoftmaxBaseWorkload.cpp \ - workloads/ClSoftmaxFloatWorkload.cpp \ - workloads/ClSoftmaxUint8Workload.cpp \ - workloads/ClSpaceToBatchNdWorkload.cpp \ - workloads/ClSpaceToDepthWorkload.cpp \ - workloads/ClSplitterWorkload.cpp \ - workloads/ClStackWorkload.cpp \ - workloads/ClStridedSliceWorkload.cpp \ - workloads/ClSubtractionWorkload.cpp \ - workloads/ClTransposeConvolution2dWorkload.cpp -else - -# ARMNN_COMPUTE_CL_ENABLED == 0 -# No source file will be compiled for the CL backend - -BACKEND_SOURCES := - -endif - -# BACKEND_TEST_SOURCES contains the list of files to be included -# in the Android unit test build (armnn-tests) and it is picked -# up by the Android.mk file in the root of ArmNN - -# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED is declared in android-nn-driver/Android.mk) -ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1) - -# ARMNN_COMPUTE_CL_ENABLED == 1 -# Include the source files for the CL backend tests - -BACKEND_TEST_SOURCES := \ - test/ClCreateWorkloadTests.cpp \ - test/ClEndToEndTests.cpp \ - test/ClJsonPrinterTests.cpp \ - test/ClLayerSupportTests.cpp \ - test/ClLayerTests.cpp \ - test/ClOptimizedNetworkTests.cpp \ - test/ClRuntimeTests.cpp \ - test/Fp16SupportTest.cpp \ - test/OpenClTimerTest.cpp - -ifeq ($(ARMNN_REF_ENABLED),1) -BACKEND_TEST_SOURCES += \ - test/ClMemCopyTests.cpp -endif # ARMNN_REF_ENABLED == 1 - -else - -# ARMNN_COMPUTE_CL_ENABLED == 0 -# No source file will be compiled for the CL backend tests - -BACKEND_TEST_SOURCES := - -endif diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt deleted file mode 100644 index f900fc55ef..0000000000 --- a/src/backends/cl/test/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright © 2017 Arm Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -list(APPEND armnnClBackendUnitTests_sources - ClContextControlFixture.hpp - ClCreateWorkloadTests.cpp - ClEndToEndTests.cpp - ClJsonPrinterTests.cpp - ClLayerSupportTests.cpp - ClLayerTests.cpp - ClOptimizedNetworkTests.cpp - ClRuntimeTests.cpp - ClWorkloadFactoryHelper.hpp - Fp16SupportTest.cpp - OpenClTimerTest.cpp -) - -if (ARMNNREF) - list(APPEND armnnClBackendUnitTests_sources - ClMemCopyTests.cpp - ) -endif() - -add_library(armnnClBackendUnitTests OBJECT ${armnnClBackendUnitTests_sources}) -target_include_directories(armnnClBackendUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) -target_include_directories(armnnClBackendUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) -target_include_directories(armnnClBackendUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/backends) -target_include_directories(armnnClBackendUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/profiling) diff --git a/src/backends/cl/test/ClContextControlFixture.hpp b/src/backends/cl/test/ClContextControlFixture.hpp deleted file mode 100644 index 0371c69a0b..0000000000 --- a/src/backends/cl/test/ClContextControlFixture.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <cl/ClContextControl.hpp> -#include <backendsCommon/WorkloadFactory.hpp> - -template<bool ProfilingEnabled> -struct ClContextControlFixtureBase -{ - // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case - ClContextControlFixtureBase() - : m_ClContextControl(nullptr, ProfilingEnabled) {} - - armnn::ClContextControl m_ClContextControl; -}; - -using ClContextControlFixture = ClContextControlFixtureBase<false>; -using ClProfilingContextControlFixture = ClContextControlFixtureBase<true>; diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp deleted file mode 100644 index 92e771760f..0000000000 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ /dev/null @@ -1,1055 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClContextControlFixture.hpp" -#include "ClWorkloadFactoryHelper.hpp" - -#include <backendsCommon/MemCopyWorkload.hpp> - -#include <aclCommon/test/CreateWorkloadClNeon.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <cl/ClTensorHandle.hpp> -#include <cl/ClWorkloadFactory.hpp> -#include <cl/workloads/ClWorkloads.hpp> -#include <cl/workloads/ClWorkloadUtils.hpp> - -boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, - std::initializer_list<unsigned int> expectedDimensions) -{ - return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions); -} - -BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture) - -template <armnn::DataType DataType> -static void ClCreateActivationWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). - ActivationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1})); -} - -BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload) -{ - ClCreateActivationWorkloadTest<armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) -{ - ClCreateActivationWorkloadTest<armnn::DataType::Float16>(); -} - -template <typename WorkloadType, - typename DescriptorType, - typename LayerType, - armnn::DataType DataType> -static void ClCreateElementwiseWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest). - DescriptorType queueDescriptor = workload->GetData(); - auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); -} - -BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) -{ - ClCreateElementwiseWorkloadTest<ClAdditionWorkload, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) -{ - ClCreateElementwiseWorkloadTest<ClAdditionWorkload, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) -{ - ClCreateElementwiseWorkloadTest<ClSubtractionWorkload, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload) -{ - ClCreateElementwiseWorkloadTest<ClSubtractionWorkload, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest) -{ - ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest) -{ - ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest) -{ - ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::QAsymmU8>(); -} - -BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest) -{ - ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest) -{ - ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::Float16>(); -} - -template <typename WorkloadType, - typename DescriptorType, - armnn::DataType DataType> -static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op); - - DescriptorType queueDescriptor = workload->GetData(); - - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); -} - -BOOST_AUTO_TEST_CASE(CreateRsqrtFloat32WorkloadTest) -{ - ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>( - UnaryOperation::Rsqrt); -} - -template <typename BatchNormalizationWorkloadType, armnn::DataType DataType> -static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType> - (factory, graph, dataLayout); - - // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). - BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - switch (dataLayout) - { - case DataLayout::NHWC: - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 })); - break; - default: // NCHW - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 })); - } -} - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload) -{ - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, - armnn::DataType::Float32>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload) -{ - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, - armnn::DataType::Float16>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload) -{ - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, - armnn::DataType::Float32>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload) -{ - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, - armnn::DataType::Float16>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph); - - ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3})); - BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); - BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); -} - -BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph); - - ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3})); - BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); - BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); -} - -template <typename Convolution2dWorkloadType, typename armnn::DataType DataType> -static void ClConvolution2dWorkloadTest(DataLayout dataLayout) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, - graph, - dataLayout); - - TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16}) - : std::initializer_list<unsigned int>({2, 8, 16, 3}); - TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10}) - : std::initializer_list<unsigned int>({2, 2, 10, 2}); - - // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). - Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST((inputHandle->GetShape() == inputShape)); - BOOST_TEST((outputHandle->GetShape() == outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload) -{ - ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload) -{ - ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload) -{ - ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload) -{ - ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC); -} - -template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType> -static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType> - (factory, graph, dataLayout); - - // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest). - DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 }) - : std::initializer_list<unsigned int>({ 2, 5, 5, 2 }); - TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 }) - : std::initializer_list<unsigned int>({ 2, 5, 5, 2 }); - - BOOST_TEST((inputHandle->GetShape() == inputShape)); - BOOST_TEST((outputHandle->GetShape() == outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload) -{ - ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC); -} - -template <typename Convolution2dWorkloadType, typename armnn::DataType DataType> -static void ClDirectConvolution2dWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest). - Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6})); -} - -BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload) -{ - ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload) -{ - ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload) -{ - ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>(); -} - -template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType> -static void ClCreateFullyConnectedWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = - CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). - FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7})); -} - - -BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest) -{ - ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest) -{ - ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>(); -} - -template <typename NormalizationWorkloadType, typename armnn::DataType DataType> -static void ClNormalizationWorkloadTest(DataLayout dataLayout) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout); - - // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). - NormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1}) - : std::initializer_list<unsigned int>({3, 1, 5, 5}); - TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1}) - : std::initializer_list<unsigned int>({3, 1, 5, 5}); - - BOOST_TEST((inputHandle->GetShape() == inputShape)); - BOOST_TEST((outputHandle->GetShape() == outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload) -{ - ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload) -{ - ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload) -{ - ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload) -{ - ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC); -} - -template <typename armnn::DataType DataType> -static void ClPooling2dWorkloadTest(DataLayout dataLayout) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout); - - TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5}) - : std::initializer_list<unsigned int>({3, 5, 5, 2}); - TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4}) - : std::initializer_list<unsigned int>({3, 2, 4, 2}); - - // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest). - Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST((inputHandle->GetShape() == inputShape)); - BOOST_TEST((outputHandle->GetShape() == outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload) -{ - ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload) -{ - ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload) -{ - ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload) -{ - ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC); -} - -static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape, - const armnn::TensorShape& alphaShape, - const armnn::TensorShape& outputShape, - armnn::DataType dataType) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory, - graph, - inputShape, - alphaShape, - outputShape, - dataType); - - // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest). - PreluQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto alphaHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST((inputHandle->GetShape() == inputShape)); - BOOST_TEST((alphaHandle->GetShape() == alphaShape)); - BOOST_TEST((outputHandle->GetShape() == outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload) -{ - ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16); -} - -BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload) -{ - ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32); -} - -BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload) -{ - ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8); -} - -template <typename armnn::DataType DataType> -static void ClCreateReshapeWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph); - - // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). - ReshapeQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4})); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload) -{ - ClCreateReshapeWorkloadTest<armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) -{ - ClCreateReshapeWorkloadTest<armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) -{ - ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>(); -} - -template <typename SoftmaxWorkloadType, typename armnn::DataType DataType> -static void ClSoftmaxWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload). - SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1})); -} - - -BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest) -{ - ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest) -{ - ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float16>(); -} - -template <typename armnn::DataType DataType> -static void ClSplitterWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph); - - // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). - SplitterQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7})); - - auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7})); - - auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7})); - - auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7})); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload) -{ - ClSplitterWorkloadTest<armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload) -{ - ClSplitterWorkloadTest<armnn::DataType::Float16>(); -} - -template <typename armnn::DataType DataType> -static void ClSplitterConcatTest() -{ - // Tests that it is possible to decide which output of the splitter layer - // should be lined to which input of the concat layer. - // We test that is is possible to specify 0th output - // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input - // of the concat. - - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workloads = - CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType> - (factory, graph); - - auto wlSplitter = std::move(workloads.first); - auto wlConcat = std::move(workloads.second); - - //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. - armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]); - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(mIn0); - BOOST_TEST(mIn1); - - //Fliped order of inputs/outputs. - bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); - BOOST_TEST(validDataPointers); - - - //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor. - bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent()) - && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent()); - - BOOST_TEST(validSubTensorParents); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloatWorkload) -{ - ClSplitterConcatTest<armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloat16Workload) -{ - ClSplitterConcatTest<armnn::DataType::Float16>(); -} - - -BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) -{ - // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. - // We create a splitter with two outputs. That each of those outputs is used by two different activation layers. - - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - std::unique_ptr<ClSplitterWorkload> wlSplitter; - std::unique_ptr<ClActivationWorkload> wlActiv0_0; - std::unique_ptr<ClActivationWorkload> wlActiv0_1; - std::unique_ptr<ClActivationWorkload> wlActiv1_0; - std::unique_ptr<ClActivationWorkload> wlActiv1_1; - - CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload, - ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, - wlActiv1_0, wlActiv1_1); - - //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. - armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); - armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); - armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); - armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); - - - BOOST_TEST(sOut0); - BOOST_TEST(sOut1); - BOOST_TEST(activ0_0Im); - BOOST_TEST(activ0_1Im); - BOOST_TEST(activ1_0Im); - BOOST_TEST(activ1_1Im); - - bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && - (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); - - BOOST_TEST(validDataPointers); -} - -#if defined(ARMNNREF_ENABLED) - -// This test unit needs the reference backend, it's not available if the reference backend is not built - -BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) -{ - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - CreateMemCopyWorkloads<IClTensorHandle>(factory); -} - -#endif - -template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType> -static void ClL2NormalizationWorkloadTest(DataLayout dataLayout) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = - CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout); - - // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). - L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 }) - : std::initializer_list<unsigned int>({ 5, 50, 67, 20 }); - TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 }) - : std::initializer_list<unsigned int>({ 5, 50, 67, 20 }); - - BOOST_TEST((inputHandle->GetShape() == inputShape)); - BOOST_TEST((outputHandle->GetShape() == outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload) -{ - ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload) -{ - ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload) -{ - ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload) -{ - ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC); -} - -template <typename LstmWorkloadType> -static void ClCreateLstmWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph); - - LstmQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 })); -} - -BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload) -{ - ClCreateLstmWorkloadTest<ClLstmFloatWorkload>(); -} - -template <typename ResizeWorkloadType, typename armnn::DataType DataType> -static void ClResizeWorkloadTest(DataLayout dataLayout) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout); - - auto queueDescriptor = workload->GetData(); - - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - switch (dataLayout) - { - case DataLayout::NHWC: - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 })); - break; - case DataLayout::NCHW: - default: - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 })); - } -} - -BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload) -{ - ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateResizeFloat16NchwWorkload) -{ - ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload) -{ - ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload) -{ - ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreateResizeFloat16NhwcWorkload) -{ - ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload) -{ - ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC); -} - -template <typename MeanWorkloadType, typename armnn::DataType DataType> -static void ClMeanWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph); - - // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest). - MeanQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL. - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 4 })); -} - -BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload) -{ - ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload) -{ - ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload) -{ - ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>(); -} - -template <typename ConcatWorkloadType, armnn::DataType DataType> -static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape, - unsigned int concatAxis) -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis); - - ConcatQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 })); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload) -{ - ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0); -} - -BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload) -{ - ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1); -} - -BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload) -{ - ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3); -} - -BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload) -{ - ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0); -} - -BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload) -{ - ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1); -} - -BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload) -{ - ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3); -} - -template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType> -static void ClSpaceToDepthWorkloadTest() -{ - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph); - - SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 })); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 })); -} - -BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload) -{ - ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>(); -} - -BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload) -{ - ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>(); -} - -BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload) -{ - ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>(); -} - -BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload) -{ - ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>(); -} - -template <armnn::DataType DataType> -static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape, - const std::initializer_list<unsigned int>& outputShape, - unsigned int axis, - unsigned int numInputs) -{ - armnn::Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory, - graph, - TensorShape(inputShape), - TensorShape(outputShape), - axis, - numInputs); - - // Check inputs and output are as expected - StackQueueDescriptor queueDescriptor = workload->GetData(); - for (unsigned int i = 0; i < numInputs; ++i) - { - auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape)); - } - auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape)); -} - -BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload) -{ - ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2); -} - -BOOST_AUTO_TEST_CASE(CreateStackFloat16Workload) -{ - ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2); -} - -BOOST_AUTO_TEST_CASE(CreateStackUint8Workload) -{ - ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2); -} - -template <typename QuantizedLstmWorkloadType> -static void ClCreateQuantizedLstmWorkloadTest() -{ - using namespace armnn::armcomputetensorutils; - using boost::polymorphic_downcast; - - Graph graph; - ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - - auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph); - - QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData(); - - IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2}))); - BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8)); - - IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]); - BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4}))); - BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16)); - - IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]); - BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4}))); - BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8)); - - IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4}))); - BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16)); - - IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4}))); - BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8)); -} - -BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload) -{ - ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>(); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp deleted file mode 100644 index 10abcc9fe6..0000000000 --- a/src/backends/cl/test/ClEndToEndTests.cpp +++ /dev/null @@ -1,451 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <backendsCommon/test/EndToEndTestImpl.hpp> - -#include <backendsCommon/test/ArgMinMaxEndToEndTestImpl.hpp> -#include <backendsCommon/test/ComparisonEndToEndTestImpl.hpp> -#include <backendsCommon/test/ConcatEndToEndTestImpl.hpp> -#include <backendsCommon/test/DepthToSpaceEndToEndTestImpl.hpp> -#include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp> -#include <backendsCommon/test/ElementwiseUnaryEndToEndTestImpl.hpp> -#include <backendsCommon/test/InstanceNormalizationEndToEndTestImpl.hpp> -#include <backendsCommon/test/PreluEndToEndTestImpl.hpp> -#include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp> -#include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp> -#include <backendsCommon/test/SplitterEndToEndTestImpl.hpp> -#include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp> - -#include <boost/test/unit_test.hpp> - -BOOST_AUTO_TEST_SUITE(ClEndToEnd) - -std::vector<armnn::BackendId> defaultBackends = {armnn::Compute::GpuAcc}; - -// Abs -BOOST_AUTO_TEST_CASE(ClAbsEndToEndTestFloat32) -{ - std::vector<float> expectedOutput = - { - 1.f, 1.f, 1.f, 1.f, 5.f, 5.f, 5.f, 5.f, - 3.f, 3.f, 3.f, 3.f, 4.f, 4.f, 4.f, 4.f - }; - - ElementwiseUnarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, - UnaryOperation::Abs, - expectedOutput); -} - -// Constant -BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32) -{ - ConstantUsageFloat32Test(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClConcatEndToEndDim0Test) -{ - ConcatDim0EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClConcatEndToEndDim0Uint8Test) -{ - ConcatDim0EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClConcatEndToEndDim1Test) -{ - ConcatDim1EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClConcatEndToEndDim1Uint8Test) -{ - ConcatDim1EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClConcatEndToEndDim3Test) -{ - ConcatDim3EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClConcatEndToEndDim3Uint8Test) -{ - ConcatDim3EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -// DepthToSpace -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNchwFloat32) -{ - DepthToSpaceEndToEnd<armnn::DataType::Float32>(defaultBackends, armnn::DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNchwFloat16) -{ - DepthToSpaceEndToEnd<armnn::DataType::Float16>(defaultBackends, armnn::DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNchwUint8) -{ - DepthToSpaceEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, armnn::DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNchwInt16) -{ - DepthToSpaceEndToEnd<armnn::DataType::QSymmS16>(defaultBackends, armnn::DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNhwcFloat32) -{ - DepthToSpaceEndToEnd<armnn::DataType::Float32>(defaultBackends, armnn::DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNhwcFloat16) -{ - DepthToSpaceEndToEnd<armnn::DataType::Float16>(defaultBackends, armnn::DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNhwcUint8) -{ - DepthToSpaceEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, armnn::DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(DephtToSpaceEndToEndNhwcInt16) -{ - DepthToSpaceEndToEnd<armnn::DataType::QSymmS16>(defaultBackends, armnn::DataLayout::NHWC); -} - -// Dequantize -BOOST_AUTO_TEST_CASE(DequantizeEndToEndSimpleTest) -{ - DequantizeEndToEndSimple<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(DequantizeEndToEndOffsetTest) -{ - DequantizeEndToEndOffset<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClGreaterSimpleEndToEndTest) -{ - const std::vector<uint8_t> expectedOutput({ 0, 0, 0, 0, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0 }); - - ComparisonSimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, - ComparisonOperation::Greater, - expectedOutput); -} - -BOOST_AUTO_TEST_CASE(ClGreaterSimpleEndToEndUint8Test) -{ - const std::vector<uint8_t> expectedOutput({ 0, 0, 0, 0, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0 }); - - ComparisonSimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, - ComparisonOperation::Greater, - expectedOutput); -} - -BOOST_AUTO_TEST_CASE(ClGreaterBroadcastEndToEndTest) -{ - const std::vector<uint8_t> expectedOutput({ 0, 1, 0, 0, 0, 1, - 1, 1, 1, 1, 1, 1 }); - - ComparisonBroadcastEndToEnd<armnn::DataType::Float32>(defaultBackends, - ComparisonOperation::Greater, - expectedOutput); -} - -BOOST_AUTO_TEST_CASE(ClGreaterBroadcastEndToEndUint8Test) -{ - const std::vector<uint8_t> expectedOutput({ 0, 1, 0, 0, 0, 1, - 1, 1, 1, 1, 1, 1 }); - - ComparisonBroadcastEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, - ComparisonOperation::Greater, - expectedOutput); -} - -// InstanceNormalization -BOOST_AUTO_TEST_CASE(ClInstanceNormalizationNhwcEndToEndTest1) -{ - InstanceNormalizationNhwcEndToEndTest1(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClInstanceNormalizationNchwEndToEndTest1) -{ - InstanceNormalizationNchwEndToEndTest1(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClInstanceNormalizationNhwcEndToEndTest2) -{ - InstanceNormalizationNhwcEndToEndTest2(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClInstanceNormalizationNchwEndToEndTest2) -{ - InstanceNormalizationNchwEndToEndTest2(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClPreluEndToEndFloat32Test) -{ - PreluEndToEndNegativeTest<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClPreluEndToEndTestUint8) -{ - PreluEndToEndPositiveTest<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSpaceToDepthNhwcEndToEndTest1) -{ - SpaceToDepthNhwcEndToEndTest1(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSpaceToDepthNchwEndToEndTest1) -{ - SpaceToDepthNchwEndToEndTest1(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSpaceToDepthNhwcEndToEndTest2) -{ - SpaceToDepthNhwcEndToEndTest2(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSpaceToDepthNchwEndToEndTest2) -{ - SpaceToDepthNchwEndToEndTest2(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter1dEndToEndTest) -{ - Splitter1dEndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter1dEndToEndUint8Test) -{ - Splitter1dEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter2dDim0EndToEndTest) -{ - Splitter2dDim0EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter2dDim1EndToEndTest) -{ - Splitter2dDim1EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter2dDim0EndToEndUint8Test) -{ - Splitter2dDim0EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter2dDim1EndToEndUint8Test) -{ - Splitter2dDim1EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter3dDim0EndToEndTest) -{ - Splitter3dDim0EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter3dDim1EndToEndTest) -{ - Splitter3dDim1EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter3dDim2EndToEndTest) -{ - Splitter3dDim2EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter3dDim0EndToEndUint8Test) -{ - Splitter3dDim0EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter3dDim1EndToEndUint8Test) -{ - Splitter3dDim1EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter3dDim2EndToEndUint8Test) -{ - Splitter3dDim2EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim0EndToEndTest) -{ - Splitter4dDim0EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim1EndToEndTest) -{ - Splitter4dDim1EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim2EndToEndTest) -{ - Splitter4dDim2EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim3EndToEndTest) -{ - Splitter4dDim3EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim0EndToEndUint8Test) -{ - Splitter4dDim0EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim1EndToEndUint8Test) -{ - Splitter4dDim1EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim2EndToEndUint8Test) -{ - Splitter4dDim2EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClSplitter4dDim3EndToEndUint8Test) -{ - Splitter4dDim3EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -// TransposeConvolution2d -BOOST_AUTO_TEST_CASE(ClTransposeConvolution2dEndToEndFloatNchwTest) -{ - TransposeConvolution2dEndToEnd<armnn::DataType::Float32, armnn::DataType::Float32>( - defaultBackends, armnn::DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(ClTransposeConvolution2dEndToEndUint8NchwTest) -{ - TransposeConvolution2dEndToEnd<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>( - defaultBackends, armnn::DataLayout::NCHW); -} - -BOOST_AUTO_TEST_CASE(ClTransposeConvolution2dEndToEndFloatNhwcTest) -{ - TransposeConvolution2dEndToEnd<armnn::DataType::Float32, armnn::DataType::Float32>( - defaultBackends, armnn::DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(ClTransposeConvolution2dEndToEndUint8NhwcTest) -{ - TransposeConvolution2dEndToEnd<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>( - defaultBackends, armnn::DataLayout::NHWC); -} - -BOOST_AUTO_TEST_CASE(ClQuantizedLstmEndToEndTest) -{ - QuantizedLstmEndToEnd(defaultBackends); -} - -// ArgMinMax -BOOST_AUTO_TEST_CASE(ClArgMaxSimpleTest) -{ - ArgMaxEndToEndSimple<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinSimpleTest) -{ - ArgMinEndToEndSimple<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis0Test) -{ - ArgMaxAxis0EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis0Test) -{ - ArgMinAxis0EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis1Test) -{ - ArgMaxAxis1EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis1Test) -{ - ArgMinAxis1EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis2Test) -{ - ArgMaxAxis2EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis2Test) -{ - ArgMinAxis2EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis3Test) -{ - ArgMaxAxis3EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis3Test) -{ - ArgMinAxis3EndToEnd<armnn::DataType::Float32>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxSimpleTestQAsymmU8) -{ - ArgMaxEndToEndSimple<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinSimpleTestQAsymmU8) -{ - ArgMinEndToEndSimple<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis0TestQAsymmU8) -{ - ArgMaxAxis0EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis0TestQAsymmU8) -{ - ArgMinAxis0EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis1TestQAsymmU8) -{ - ArgMaxAxis1EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis1TestQAsymmU8) -{ - ArgMinAxis1EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis2TestQAsymmU8) -{ - ArgMaxAxis2EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis2TestQAsymmU8) -{ - ArgMinAxis2EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMaxAxis3TestQAsymmU8) -{ - ArgMaxAxis3EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_CASE(ClArgMinAxis3TestQAsymmU8) -{ - ArgMinAxis3EndToEnd<armnn::DataType::QAsymmU8>(defaultBackends); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/ClJsonPrinterTests.cpp b/src/backends/cl/test/ClJsonPrinterTests.cpp deleted file mode 100644 index d188a8e9f4..0000000000 --- a/src/backends/cl/test/ClJsonPrinterTests.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <armnn/BackendId.hpp> - -#include <cl/test/ClContextControlFixture.hpp> -#include <backendsCommon/test/JsonPrinterTestImpl.hpp> - -#include <boost/test/unit_test.hpp> - -#include <vector> - -BOOST_FIXTURE_TEST_SUITE(ClJsonPrinter, ClProfilingContextControlFixture) - -BOOST_AUTO_TEST_CASE(SoftmaxProfilerJsonPrinterGpuAccTest) -{ - std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc}; - RunSoftmaxProfilerJsonPrinterTest(backends); -} - -BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file diff --git a/src/backends/cl/test/ClLayerSupportTests.cpp b/src/backends/cl/test/ClLayerSupportTests.cpp deleted file mode 100644 index 33a2912b79..0000000000 --- a/src/backends/cl/test/ClLayerSupportTests.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClWorkloadFactoryHelper.hpp" - -#include <layers/ConvertFp16ToFp32Layer.hpp> -#include <layers/ConvertFp32ToFp16Layer.hpp> -#include <layers/MeanLayer.hpp> -#include <test/TensorHelpers.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClWorkloadFactory.hpp> -#include <cl/test/ClContextControlFixture.hpp> -#include <backendsCommon/test/IsLayerSupportedTestImpl.hpp> -#include <backendsCommon/test/LayerTests.hpp> - -#include <boost/test/unit_test.hpp> - -#include <string> - -BOOST_AUTO_TEST_SUITE(ClLayerSupport) - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat16Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float16>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat32Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float32>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedQAsymmU8Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::QAsymmU8>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedQAsymmS8Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::QAsymmS8>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsLayerSupportedQSymmS8Cl, ClContextControlFixture) -{ - armnn::ClWorkloadFactory factory = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::QSymmS8>(&factory); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float16, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp32InputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float16"); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp16OutputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp16ToFp32Layer, - armnn::DataType::Float16, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float32"); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float32, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp16InputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float16, armnn::DataType::Float16>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float32"); -} - -BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp32OutputCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsConvertLayerSupportedTests<armnn::ClWorkloadFactory, armnn::ConvertFp32ToFp16Layer, - armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(!result); - BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float16"); -} - -BOOST_FIXTURE_TEST_CASE(IsMeanSupportedCl, ClContextControlFixture) -{ - std::string reasonIfUnsupported; - - bool result = IsMeanLayerSupportedTests<armnn::ClWorkloadFactory, - armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported); - - BOOST_CHECK(result); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp deleted file mode 100644 index 294c77bf7f..0000000000 --- a/src/backends/cl/test/ClLayerTests.cpp +++ /dev/null @@ -1,1001 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClContextControlFixture.hpp" -#include "ClWorkloadFactoryHelper.hpp" - -#include "test/TensorHelpers.hpp" -#include "test/UnitTests.hpp" - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClWorkloadFactory.hpp> -#include <cl/workloads/ClWorkloadUtils.hpp> - -#include <backendsCommon/test/ActivationFixture.hpp> -#include <backendsCommon/test/LayerTests.hpp> - -#include <arm_compute/core/CL/CLKernelLibrary.h> -#include <arm_compute/runtime/CL/CLScheduler.h> - -#include <boost/test/unit_test.hpp> - -#include <iostream> -#include <string> - -BOOST_FIXTURE_TEST_SUITE(Compute_ArmComputeCl, ClContextControlFixture) - -using namespace armnn; - -using FactoryType = ClWorkloadFactory; - -// ============================================================================ -// UNIT tests - -// Activation -ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) - -ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) -ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) - -ARMNN_AUTO_TEST_CASE(Logistic, SimpleSigmoidTest) -ARMNN_AUTO_TEST_CASE(LogisticUint8, SimpleSigmoidUint8Test) - -// Sqrt Activation -ARMNN_AUTO_TEST_CASE(Sqrt, SqrtTest) -ARMNN_AUTO_TEST_CASE(SqrtNN, SqrtNNTest) - -// Batch To Space -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat321, BatchToSpaceNdNhwcTest1<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat322, BatchToSpaceNdNhwcTest2<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat323, BatchToSpaceNdNhwcTest3<DataType::Float32>) - -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat321, BatchToSpaceNdNchwTest1<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat322, BatchToSpaceNdNchwTest2<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat323, BatchToSpaceNdNchwTest3<DataType::Float32>) - -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint1, BatchToSpaceNdNhwcTest1<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint2, BatchToSpaceNdNhwcTest2<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint3, BatchToSpaceNdNhwcTest3<DataType::QAsymmU8>) - -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint1, BatchToSpaceNdNchwTest1<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint2, BatchToSpaceNdNchwTest2<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint3, BatchToSpaceNdNchwTest3<DataType::QAsymmU8>) - -// Fully Connected -ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) -ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) -ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedTest<DataType::QAsymmU8>, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedTest<DataType::QAsymmU8>, true) - -ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) -ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) - -// Convolution -ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) - -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dNhwc, SimpleConvolution2d3x5Test, true, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8Nhwc, SimpleConvolution2d3x3Uint8Test, true, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dNhwc, SimpleConvolution2d3x5Test, false, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dStride2x2Nhwc, - SimpleConvolution2d3x3Stride2x2Test, false, DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest, DataLayout::NCHW) - -ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquareNhwc, SimpleConvolution2d3x3Test, false, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPaddingNhwc, - Convolution2dAsymmetricPaddingTest, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquareNhwc, SimpleConvolution2d3x3NhwcTest, false) - -ARMNN_AUTO_TEST_CASE(Convolution2d3x3Dilation3x3, - Convolution2d3x3Dilation3x3Test<DataType::Float32, DataType::Float32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(Convolution2d3x3Dilation3x3Nhwc, - Convolution2d3x3Dilation3x3Test<DataType::Float32, DataType::Float32>, - false, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(Convolution2d3x3Dilation3x3Uint8, - Convolution2d3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(Convolution2d3x3Dilation3x3NhwcUint8, - Convolution2d3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>, - false, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(Convolution2d2x3x3Dilation3x3, - Convolution2d2x3x3Dilation3x3Test<DataType::Float32, DataType::Float32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(Convolution2d2x3x3Dilation3x3Nhwc, - Convolution2d2x3x3Dilation3x3Test<DataType::Float32, DataType::Float32>, - false, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(Convolution2d2x3x3Dilation3x3Uint8, - Convolution2d2x3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(Convolution2d2x3x3Dilation3x3NhwcUint8, - Convolution2d2x3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>, - false, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(Convolution2d2x2Dilation2x2Padding2x2Stride3x3, - Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<DataType::Float32, DataType::Float32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(Convolution2d2x2Dilation2x2Padding2x2Stride3x3Nhwc, - Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<DataType::Float32, DataType::Float32>, - false, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(Convolution2d2x2Dilation2x2Padding2x2Stride3x3Uint8, - Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<DataType::QAsymmU8, DataType::Signed32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(Convolution2d2x2Dilation2x2Padding2x2Stride3x3NhwcUint8, - Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<DataType::QAsymmU8, DataType::Signed32>, - false, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(Convolution2dPerAxisQuantTestNchw, Convolution2dPerAxisQuantTest, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(Convolution2dPerAxisQuantTestNhwc, Convolution2dPerAxisQuantTest, DataLayout::NHWC); - -// Depthwise Convolution -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, - DepthwiseConvolution2dDepthMul1Test, true, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, - DepthwiseConvolution2dDepthMul1Test, false, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, - DepthwiseConvolution2dDepthMul1Uint8Test, true, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, - DepthwiseConvolution2dDepthMul1Uint8Test, false, DataLayout::NCHW) - -// NHWC Depthwise Convolution -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Nhwc, - DepthwiseConvolution2dDepthMul1Test, true, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Nhwc, - DepthwiseConvolution2dDepthMul1Test, false, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8Nhwc, - DepthwiseConvolution2dDepthMul1Uint8Test, true, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8Nhwc, - DepthwiseConvolution2dDepthMul1Uint8Test, false, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleDepthwiseConvolution2d3x3Dilation3x3Nhwc, - SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest) - - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthNhwc, DepthwiseConvolution2dDepthNhwcTest, false) - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, - DepthwiseConvolution2dAsymmetricTest, true, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, - DepthwiseConvolution2dAsymmetricTest, false, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetricNhwc, - DepthwiseConvolution2dAsymmetricTest, true, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetricNhwc, - DepthwiseConvolution2dAsymmetricTest, false, DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul64, DepthwiseConvolution2dDepthMul64Test); - -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dPerAxisQuantTestNchw, DepthwiseConvolution2dPerAxisQuantTest, - DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dPerAxisQuantTestNhwc, DepthwiseConvolution2dPerAxisQuantTest, - DataLayout::NHWC); - -// Splitter -ARMNN_AUTO_TEST_CASE(SimpleSplitterFloat32, SplitterFloat32Test) -ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) - -ARMNN_AUTO_TEST_CASE(CopyViaSplitterFloat32, CopyViaSplitterFloat32Test) -ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) - -// Concat -ARMNN_AUTO_TEST_CASE(SimpleConcat, ConcatTest) -ARMNN_AUTO_TEST_CASE(ConcatUint8, ConcatUint8Test) -ARMNN_AUTO_TEST_CASE(ConcatUint8DifferentInputOutputQParam, - ConcatDifferentInputOutputQParamTest<DataType::QAsymmU8>, false) - -// Normalization -ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest) -ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest) -ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcrossNhwc, SimpleNormalizationAcrossNhwcTest) - -// Pooling -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, - IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) -ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2d, SimpleMaxPooling2dTest, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dNhwc, SimpleMaxPooling2dTest, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dUint8, SimpleMaxPooling2dUint8Test, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dUint8Nhwc, SimpleMaxPooling2dUint8Test, DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dNhwc, SimpleAveragePooling2dTest, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8Nhwc, SimpleAveragePooling2dUint8Test, DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, - false) -ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, - IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, - true) -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) -ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) - -ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2dNhwc, SimpleL2Pooling2dTest, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test, DataLayout::NCHW) - -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) -ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) -ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) - -// Add -ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) -ARMNN_AUTO_TEST_CASE(Add5d, Addition5dTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) - -ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) -ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) -ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) - -// Sub -ARMNN_AUTO_TEST_CASE(SimpleSub, SubtractionTest) -ARMNN_AUTO_TEST_CASE(SubBroadcast1Element, SubtractionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(SubBroadcast, SubtractionBroadcastTest) - -ARMNN_AUTO_TEST_CASE(SubtractionUint8, SubtractionUint8Test) -ARMNN_AUTO_TEST_CASE(SubBroadcastUint8, SubtractionBroadcastUint8Test) -ARMNN_AUTO_TEST_CASE(SubBroadcast1ElementUint8, SubtractionBroadcast1ElementUint8Test) - -// Div -ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest) -ARMNN_AUTO_TEST_CASE(DivisionByZero, DivisionByZeroTest) -ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest) -// NOTE: quantized division is not supported by CL and not required by the -// android NN api - -// Mul -ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) -ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test) -ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test) -ARMNN_AUTO_TEST_CASE(Multiplication5d, Multiplication5dTest) - -// Batch Norm -ARMNN_AUTO_TEST_CASE(BatchNormFloat32, BatchNormFloat32Test) -ARMNN_AUTO_TEST_CASE(BatchNormFloat32Nhwc, BatchNormFloat32NhwcTest) - -// InstanceNormalization -ARMNN_AUTO_TEST_CASE(InstanceNormFloat32Nchw, InstanceNormFloat32Test, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(InstanceNormFloat16Nchw, InstanceNormFloat16Test, DataLayout::NCHW); - -ARMNN_AUTO_TEST_CASE(InstanceNormFloat32Nhwc, InstanceNormFloat32Test, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(InstanceNormFloat16Nhwc, InstanceNormFloat16Test, DataLayout::NHWC); - -ARMNN_AUTO_TEST_CASE(InstanceNormFloat32Nchw2, InstanceNormFloat32Test2, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(InstanceNormFloat16Nchw2, InstanceNormFloat16Test2, DataLayout::NCHW); - -ARMNN_AUTO_TEST_CASE(InstanceNormFloat32Nhwc2, InstanceNormFloat32Test2, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(InstanceNormFloat16Nhwc2, InstanceNormFloat16Test2, DataLayout::NHWC); - -// L2 Normalization -ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest, DataLayout::NCHW) - -ARMNN_AUTO_TEST_CASE(L2Normalization1dNhwc, L2Normalization1dTest, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(L2Normalization2dNhwc, L2Normalization2dTest, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(L2Normalization3dNhwc, L2Normalization3dTest, DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(L2Normalization4dNhwc, L2Normalization4dTest, DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(L2Normalization2dShape, L2Normalization2dShapeTest); - -ARMNN_AUTO_TEST_CASE(L2NormalizationDefaultEpsilon, L2NormalizationDefaultEpsilonTest, DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(L2NormalizationNonDefaultEpsilon, L2NormalizationNonDefaultEpsilonTest, DataLayout::NCHW) - -// Constant -ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) -ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8SimpleQuantizationScaleNoOffsetTest) - -// Concat -ARMNN_AUTO_TEST_CASE(Concat1d, Concat1dTest) -ARMNN_AUTO_TEST_CASE(Concat1dUint8, Concat1dUint8Test) - -ARMNN_AUTO_TEST_CASE(Concat2dDim0, Concat2dDim0Test) -ARMNN_AUTO_TEST_CASE(Concat2dDim0Uint8, Concat2dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concat2dDim1, Concat2dDim1Test) -ARMNN_AUTO_TEST_CASE(Concat2dDim1Uint8, Concat2dDim1Uint8Test) - -ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDims, Concat2dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDimsUint8, Concat2dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDims, Concat2dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDimsUint8, Concat2dDim1DiffInputDimsUint8Test) - -ARMNN_AUTO_TEST_CASE(Concat3dDim0, Concat3dDim0Test) -ARMNN_AUTO_TEST_CASE(Concat3dDim0Uint8, Concat3dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concat3dDim1, Concat3dDim1Test) -ARMNN_AUTO_TEST_CASE(Concat3dDim1Uint8, Concat3dDim1Uint8Test) -ARMNN_AUTO_TEST_CASE(Concat3dDim2, Concat3dDim2Test, false) -ARMNN_AUTO_TEST_CASE(Concat3dDim2Uint8, Concat3dDim2Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDims, Concat3dDim0DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDimsUint8, Concat3dDim0DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDims, Concat3dDim1DiffInputDimsTest) -ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDimsUint8, Concat3dDim1DiffInputDimsUint8Test) -ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDims, Concat3dDim2DiffInputDimsTest, false) -ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDimsUint8, Concat3dDim2DiffInputDimsUint8Test, false) - -ARMNN_AUTO_TEST_CASE(Concat4dDim0, Concat4dDim0Test) -ARMNN_AUTO_TEST_CASE(Concat4dDim1, Concat4dDim1Test) -ARMNN_AUTO_TEST_CASE(Concat4dDim3, Concat4dDim3Test, false) -ARMNN_AUTO_TEST_CASE(Concat4dDim0Uint8, Concat4dDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concat4dDim1Uint8, Concat4dDim1Uint8Test) -ARMNN_AUTO_TEST_CASE(Concat4dDim3Uint8, Concat4dDim3Uint8Test, false) - -ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0, Concat4dDiffShapeDim0Test) -ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1, Concat4dDiffShapeDim1Test) -ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3, Concat4dDiffShapeDim3Test, false) -ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0Uint8, Concat4dDiffShapeDim0Uint8Test) -ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1Uint8, Concat4dDiffShapeDim1Uint8Test) -ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3Uint8, Concat4dDiffShapeDim3Uint8Test, false) - -// DepthToSpace -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_1, DepthToSpaceTest1<DataType::Float32>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_2, DepthToSpaceTest2<DataType::Float32>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_3, DepthToSpaceTest3<DataType::Float32>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_4, DepthToSpaceTest4<DataType::Float32>, DataLayout::NCHW); - -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_1, DepthToSpaceTest1<DataType::Float16>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_2, DepthToSpaceTest2<DataType::Float16>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_3, DepthToSpaceTest3<DataType::Float16>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_4, DepthToSpaceTest4<DataType::Float16>, DataLayout::NCHW); - -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_1, DepthToSpaceTest1<DataType::QAsymmU8>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_2, DepthToSpaceTest2<DataType::QAsymmU8>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_3, DepthToSpaceTest3<DataType::QAsymmU8>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_4, DepthToSpaceTest4<DataType::QAsymmU8>, DataLayout::NCHW); - -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_1, DepthToSpaceTest1<DataType::QSymmS16>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_2, DepthToSpaceTest2<DataType::QSymmS16>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_3, DepthToSpaceTest3<DataType::QSymmS16>, DataLayout::NCHW); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_4, DepthToSpaceTest4<DataType::QSymmS16>, DataLayout::NCHW); - -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_1, DepthToSpaceTest1<DataType::Float32>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_2, DepthToSpaceTest2<DataType::Float32>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_3, DepthToSpaceTest3<DataType::Float32>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_4, DepthToSpaceTest4<DataType::Float32>, DataLayout::NHWC); - -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_1, DepthToSpaceTest1<DataType::Float16>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_2, DepthToSpaceTest2<DataType::Float16>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_3, DepthToSpaceTest3<DataType::Float16>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_4, DepthToSpaceTest4<DataType::Float16>, DataLayout::NHWC); - -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_1, DepthToSpaceTest1<DataType::QAsymmU8>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_2, DepthToSpaceTest2<DataType::QAsymmU8>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_3, DepthToSpaceTest3<DataType::QAsymmU8>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_4, DepthToSpaceTest4<DataType::QAsymmU8>, DataLayout::NHWC); - -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_1, DepthToSpaceTest1<DataType::QSymmS16>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_2, DepthToSpaceTest2<DataType::QSymmS16>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_3, DepthToSpaceTest3<DataType::QSymmS16>, DataLayout::NHWC); -ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_4, DepthToSpaceTest4<DataType::QSymmS16>, DataLayout::NHWC); - -// Floor -ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest<DataType::Float32>) - -// Reshape -ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(Reshape5d, Reshape5dTest<DataType::Float32>) - -// Pad -ARMNN_AUTO_TEST_CASE(PadFloat322d, PadFloat322dTest) -ARMNN_AUTO_TEST_CASE(PadFloat322dCustomPadding, PadFloat322dCustomPaddingTest) -ARMNN_AUTO_TEST_CASE(PadFloat323d, PadFloat323dTest) -ARMNN_AUTO_TEST_CASE(PadFloat324d, PadFloat324dTest) - -ARMNN_AUTO_TEST_CASE(PadUint82d, PadUint82dTest) -ARMNN_AUTO_TEST_CASE(PadUint82dCustomPadding, PadUint82dCustomPaddingTest) -ARMNN_AUTO_TEST_CASE(PadUint83d, PadUint83dTest) -ARMNN_AUTO_TEST_CASE(PadUint84d, PadUint84dTest) - -ARMNN_AUTO_TEST_CASE(Pad2dQSymm16, Pad2dTestCommon<DataType::QSymmS16>, 2.0f, 0, 0.0f) -ARMNN_AUTO_TEST_CASE(Pad2dQSymm16CustomPadding, Pad2dTestCommon<DataType::QSymmS16>, 2.0f, 0, 1.0f) -ARMNN_AUTO_TEST_CASE(Pad3dQSymm16, Pad3dTestCommon<DataType::QSymmS16>, 2.0f, 0) -ARMNN_AUTO_TEST_CASE(Pad4dQSymm16, Pad4dTestCommon<DataType::QSymmS16>, 2.0f, 0) - -// PReLU -ARMNN_AUTO_TEST_CASE(PreluFloat32, PreluTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(PreluUint8, PreluTest<DataType::QAsymmU8>) - -// Permute -ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1Test, PermuteValueSet1Test<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2Test, PermuteValueSet2Test<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3Test, PermuteValueSet3Test<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(SimplePermuteQASymm8, SimplePermuteTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(PermuteQASymm8ValueSet1Test, PermuteValueSet1Test<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(PermuteQASymm8ValueSet2Test, PermuteValueSet2Test<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(PermuteQASymm8ValueSet3Test, PermuteValueSet3Test<DataType::QAsymmU8>) - -// Lstm -ARMNN_AUTO_TEST_CASE(LstmLayerFloat32WithCifgWithPeepholeNoProjection, - LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest) -ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection, - LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest) -ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjection, - LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest) - -ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNorm, - LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest) - -ARMNN_AUTO_TEST_CASE(QuantizedLstm, QuantizedLstmTest) - -// Convert from Float16 to Float32 -ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test) -// Convert from Float32 to Float16 -ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) - -ARMNN_AUTO_TEST_CASE(AdditionAfterMaxPool, AdditionAfterMaxPoolTest) - -//Max -ARMNN_AUTO_TEST_CASE(MaximumSimple, MaximumSimpleTest) -ARMNN_AUTO_TEST_CASE(MaximumBroadcast1Element, MaximumBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(MaximumBroadcast1DVector, MaximumBroadcast1DVectorTest) -ARMNN_AUTO_TEST_CASE(MaximumUint8, MaximumUint8Test) -ARMNN_AUTO_TEST_CASE(MaximumBroadcast1ElementUint8, MaximumBroadcast1ElementUint8Test) -ARMNN_AUTO_TEST_CASE(MaximumBroadcast1DVectorUint8, MaximumBroadcast1DVectorUint8Test) - -// Mean -ARMNN_AUTO_TEST_CASE(MeanSimpleFloat32, MeanSimpleTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(MeanSimpleAxisFloat32, MeanSimpleAxisTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(MeanKeepDimsFloat32, MeanKeepDimsTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(MeanMultipleDimsFloat32, MeanMultipleDimsTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(MeanVts1Float32, MeanVts1Test<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(MeanVts2Float32, MeanVts2Test<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(MeanVts3Float32, MeanVts3Test<DataType::Float32>) - -ARMNN_AUTO_TEST_CASE(MeanSimpleQuantisedAsymm8, MeanSimpleTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(MeanSimpleAxisQuantisedAsymm8, MeanSimpleAxisTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(MeanKeepDimsQuantisedAsymm8, MeanKeepDimsTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(MeanMultipleDimsQuantisedAsymm8, MeanMultipleDimsTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(MeanVts1QuantisedAsymm8, MeanVts1Test<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(MeanVts2QuantisedAsymm8, MeanVts2Test<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(MeanVts3QuantisedAsymm8, MeanVts3Test<DataType::QAsymmU8>) - -// Minimum -ARMNN_AUTO_TEST_CASE(MinimumBroadcast1Element1, MinimumBroadcast1ElementTest1) -ARMNN_AUTO_TEST_CASE(MinimumBroadcast1Element2, MinimumBroadcast1ElementTest2) -ARMNN_AUTO_TEST_CASE(MinimumBroadcast1DVectorUint8, MinimumBroadcast1DVectorUint8Test) - -// Greater -ARMNN_AUTO_TEST_CASE(GreaterSimple, GreaterSimpleTest) -ARMNN_AUTO_TEST_CASE(GreaterBroadcast1Element, GreaterBroadcast1ElementTest) -ARMNN_AUTO_TEST_CASE(GreaterBroadcast1dVector, GreaterBroadcast1dVectorTest) - -ARMNN_AUTO_TEST_CASE(GreaterSimpleUint8, GreaterSimpleUint8Test) -ARMNN_AUTO_TEST_CASE(GreaterBroadcast1ElementUint8, GreaterBroadcast1ElementUint8Test) -ARMNN_AUTO_TEST_CASE(GreaterBroadcast1dVectorUint8, GreaterBroadcast1dVectorUint8Test) - -// Softmax -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) -ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) - -ARMNN_AUTO_TEST_CASE(Simple3dSoftmax, Simple3dSoftmaxTest, 1.0f) -ARMNN_AUTO_TEST_CASE(Simple3dSoftmaxUint8, Simple3dSoftmaxUint8Test, 1.0f) - -ARMNN_AUTO_TEST_CASE(Simple4dSoftmax, Simple4dSoftmaxTest, 1.0f) -ARMNN_AUTO_TEST_CASE(Simple4dSoftmaxUint8, Simple4dSoftmaxUint8Test, 1.0f) - -// Space To Batch Nd -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleFloat32, SpaceToBatchNdSimpleFloat32Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsFloat32, SpaceToBatchNdMultiChannelsFloat32Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockFloat32, SpaceToBatchNdMultiBlockFloat32Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingFloat32, SpaceToBatchNdPaddingFloat32Test) - -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleUint8, SpaceToBatchNdSimpleUint8Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsUint8, SpaceToBatchNdMultiChannelsUint8Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockUint8, SpaceToBatchNdMultiBlockUint8Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingUint8, SpaceToBatchNdPaddingUint8Test) - -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNhwcFloat32, SpaceToBatchNdSimpleNhwcFloat32Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNhwcFloat32, SpaceToBatchNdMultiChannelsNhwcFloat32Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNhwcFloat32, SpaceToBatchNdMultiBlockNhwcFloat32Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNhwcFloat32, SpaceToBatchNdPaddingNhwcFloat32Test) - -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNhwcUint8, SpaceToBatchNdSimpleNhwcUint8Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNhwcUint8, SpaceToBatchNdMultiChannelsNhwcUint8Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNhwcUint8, SpaceToBatchNdMultiBlockNhwcUint8Test) -ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNhwcUint8, SpaceToBatchNdPaddingNhwcUint8Test) - -// Space To Depth -ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcAsymmQ8, SpaceToDepthNhwcAsymmQ8Test) -ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwAsymmQ8, SpaceToDepthNchwAsymmQ8Test) - -ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwx1Float32, SpaceToDepthNhwcFloat32Test1) -ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw1Float32, SpaceToDepthNchwFloat32Test1) - -ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwc2Float32, SpaceToDepthNhwcFloat32Test2) -ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw2Float32, SpaceToDepthNchwFloat32Test2) - -ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcQSymm16, SpaceToDepthNhwcQSymm16Test) -ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwQSymm16, SpaceToDepthNchwQSymm16Test) - -// Stack -ARMNN_AUTO_TEST_CASE(Stack0Axis, StackAxis0Float32Test) -ARMNN_AUTO_TEST_CASE(StackOutput4DAxis1, StackOutput4DAxis1Float32Test) -ARMNN_AUTO_TEST_CASE(StackOutput4DAxis2, StackOutput4DAxis2Float32Test) -ARMNN_AUTO_TEST_CASE(StackOutput4DAxis3, StackOutput4DAxis3Float32Test) -ARMNN_AUTO_TEST_CASE(StackOutput3DInputs3, StackOutput3DInputs3Float32Test) -ARMNN_AUTO_TEST_CASE(StackOutput5D, StackOutput5DFloat32Test) -ARMNN_AUTO_TEST_CASE(StackFloat16, StackFloat16Test) - -// Slice -ARMNN_AUTO_TEST_CASE(Slice4dFloat32, Slice4dFloat32Test) -ARMNN_AUTO_TEST_CASE(Slice3dFloat32, Slice3dFloat32Test) -ARMNN_AUTO_TEST_CASE(Slice2dFloat32, Slice2dFloat32Test) -ARMNN_AUTO_TEST_CASE(Slice1dFloat32, Slice1dFloat32Test) -ARMNN_AUTO_TEST_CASE(Slice4dUint8, Slice4dUint8Test) -ARMNN_AUTO_TEST_CASE(Slice3dUint8, Slice3dUint8Test) -ARMNN_AUTO_TEST_CASE(Slice2dUint8, Slice2dUint8Test) -ARMNN_AUTO_TEST_CASE(Slice1dUint8, Slice1dUint8Test) -ARMNN_AUTO_TEST_CASE(Slice4dInt16, Slice4dInt16Test) -ARMNN_AUTO_TEST_CASE(Slice3dInt16, Slice3dInt16Test) -ARMNN_AUTO_TEST_CASE(Slice2dInt16, Slice2dInt16Test) -ARMNN_AUTO_TEST_CASE(Slice1dInt16, Slice1dInt16Test) - -// Strided Slice -ARMNN_AUTO_TEST_CASE(StridedSlice4dFloat32, StridedSlice4dFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseFloat32, StridedSlice4dReverseFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideFloat32, StridedSliceSimpleStrideFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskFloat32, StridedSliceSimpleRangeMaskFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskFloat32, StridedSliceShrinkAxisMaskFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskCTSFloat32, StridedSliceShrinkAxisMaskCTSFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0Dim3Float32, - StridedSliceShrinkAxisMaskBitPosition0Dim3Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0Float32, StridedSliceShrinkAxisMaskBitPosition0Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition1Float32, StridedSliceShrinkAxisMaskBitPosition1Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition2Float32, StridedSliceShrinkAxisMaskBitPosition2Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition3Float32, StridedSliceShrinkAxisMaskBitPosition3Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And1Float32, - StridedSliceShrinkAxisMaskBitPosition0And1Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And2Float32, - StridedSliceShrinkAxisMaskBitPosition0And2Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And3Float32, - StridedSliceShrinkAxisMaskBitPosition0And3Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And1And3Float32, - StridedSliceShrinkAxisMaskBitPosition0And1And3Float32Test) -ARMNN_AUTO_TEST_CASE(StridedSlice3dFloat32, StridedSlice3dFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseFloat32, StridedSlice3dReverseFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSlice2dFloat32, StridedSlice2dFloat32Test) -ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseFloat32, StridedSlice2dReverseFloat32Test) - -ARMNN_AUTO_TEST_CASE(StridedSlice4dUint8, StridedSlice4dUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseUint8, StridedSlice4dReverseUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideUint8, StridedSliceSimpleStrideUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskUint8, StridedSliceSimpleRangeMaskUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskUint8, StridedSliceShrinkAxisMaskUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0Dim3Uint8, - StridedSliceShrinkAxisMaskBitPosition0Dim3Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0Uint8, StridedSliceShrinkAxisMaskBitPosition0Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition1Uint8, StridedSliceShrinkAxisMaskBitPosition1Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition2Uint8, StridedSliceShrinkAxisMaskBitPosition2Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition3Uint8, StridedSliceShrinkAxisMaskBitPosition3Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And1Uint8, - StridedSliceShrinkAxisMaskBitPosition0And1Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And2Uint8, - StridedSliceShrinkAxisMaskBitPosition0And2Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And3Uint8, - StridedSliceShrinkAxisMaskBitPosition0And3Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskBitPosition0And1And3Uint8, - StridedSliceShrinkAxisMaskBitPosition0And1And3Uint8Test) -ARMNN_AUTO_TEST_CASE(StridedSlice3dUint8, StridedSlice3dUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseUint8, StridedSlice3dReverseUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSlice2dUint8, StridedSlice2dUint8Test) -ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseUint8, StridedSlice2dReverseUint8Test) - -// Resize Bilinear - NCHW -ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, - SimpleResizeBilinearTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleResizeBilinearUint8, - SimpleResizeBilinearTest<DataType::QAsymmU8>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, - ResizeBilinearNopTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeBilinearNopUint8, - ResizeBilinearNopTest<DataType::QAsymmU8>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, - ResizeBilinearSqMinTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMinUint8, - ResizeBilinearSqMinTest<DataType::QAsymmU8>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, - ResizeBilinearMinTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMinUint8, - ResizeBilinearMinTest<DataType::QAsymmU8>, - DataLayout::NCHW) - -// Resize Bilinear - NHWC -ARMNN_AUTO_TEST_CASE(ResizeBilinearNopNhwc, - ResizeBilinearNopTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeBilinearNopUint8Nhwc, - ResizeBilinearNopTest<DataType::QAsymmU8>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleResizeBilinearNhwc, - SimpleResizeBilinearTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleResizeBilinearUint8Nhwc, - SimpleResizeBilinearTest<DataType::QAsymmU8>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMinNhwc, - ResizeBilinearSqMinTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMinUint8Nhwc, - ResizeBilinearSqMinTest<DataType::QAsymmU8>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMinNhwc, - ResizeBilinearMinTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeBilinearMinUint8Nhwc, - ResizeBilinearMinTest<DataType::QAsymmU8>, - DataLayout::NHWC) - -// Resize NearestNeighbor - NCHW -ARMNN_AUTO_TEST_CASE(SimpleResizeNearestNeighbor, - SimpleResizeNearestNeighborTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleResizeNearestNeighborUint8, - SimpleResizeNearestNeighborTest<DataType::QAsymmU8>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborNop, - ResizeNearestNeighborNopTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborNopUint8, - ResizeNearestNeighborNopTest<DataType::QAsymmU8>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborSqMin, - ResizeNearestNeighborSqMinTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborSqMinUint8, - ResizeNearestNeighborSqMinTest<DataType::QAsymmU8>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMin, - ResizeNearestNeighborMinTest<DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMinUint8, - ResizeNearestNeighborMinTest<DataType::QAsymmU8>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMag, - ResizeNearestNeighborMagTest<DataType::Float32>, - DataLayout::NCHW, 0.1f, 50, 0.1f, 50) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMagUint8, - ResizeNearestNeighborMagTest<DataType::QAsymmU8>, - DataLayout::NCHW, 0.1f, 50, 0.1f, 50) - -// Resize NearestNeighbor - NHWC -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborNopNhwc, - ResizeNearestNeighborNopTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborNopUint8Nhwc, - ResizeNearestNeighborNopTest<DataType::QAsymmU8>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleResizeNearestNeighborNhwc, - SimpleResizeNearestNeighborTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleResizeNearestNeighborUint8Nhwc, - SimpleResizeNearestNeighborTest<DataType::QAsymmU8>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborSqMinNhwc, - ResizeNearestNeighborSqMinTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborSqMinUint8Nhwc, - ResizeNearestNeighborSqMinTest<DataType::QAsymmU8>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMinNhwc, - ResizeNearestNeighborMinTest<DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMinUint8Nhwc, - ResizeNearestNeighborMinTest<DataType::QAsymmU8>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMagNhwc, - ResizeNearestNeighborMagTest<DataType::Float32>, - DataLayout::NHWC, 0.1f, 50, 0.1f, 50) -ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMagUint8Nhwc, - ResizeNearestNeighborMagTest<DataType::QAsymmU8>, - DataLayout::NHWC, 0.1f, 50, 0.1f, 50) - -// Rsqrt -ARMNN_AUTO_TEST_CASE(Rsqrt2d, Rsqrt2dTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(Rsqrt3d, Rsqrt3dTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(RsqrtZero, RsqrtZeroTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(RsqrtNegative, RsqrtNegativeTest<DataType::Float32>) - -// Quantize -ARMNN_AUTO_TEST_CASE(QuantizeSimpleUint8, QuantizeSimpleUint8Test) -ARMNN_AUTO_TEST_CASE(QuantizeClampUint8, QuantizeClampUint8Test) - -// Dequantize -ARMNN_AUTO_TEST_CASE(DequantizeSimpleUint8, DequantizeSimpleUint8Test) -ARMNN_AUTO_TEST_CASE(DequantizeOffsetUint8, DequantizeOffsetUint8Test) -ARMNN_AUTO_TEST_CASE(DequantizeSimpleInt16, DequantizeSimpleInt16Test) -ARMNN_AUTO_TEST_CASE(DequantizeSimpleUint8ToFp16, DequantizeSimpleUint8ToFp16Test) -ARMNN_AUTO_TEST_CASE(DequantizeSimpleInt16ToFp16, DequantizeSimpleInt16ToFp16Test) - -// TransposeConvolution2d -ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dFloatNchw, - SimpleTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dFloatNhwc, - SimpleTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dUint8Nchw, - SimpleTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dUint8Nhwc, - SimpleTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dFloatNchw, - SimpleTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dFloatNhwc, - SimpleTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dUint8Nchw, - SimpleTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dUint8Nhwc, - SimpleTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dFloatNchw, - PaddedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dFloatNhwc, - PaddedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dUint8Nchw, - PaddedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dUint8Nhwc, - PaddedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dFloatNchw, - PaddedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dFloatNhwc, - PaddedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dUint8Nchw, - PaddedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dUint8Nhwc, - PaddedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dFloatNchw, - StridedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dFloatNhwc, - StridedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dUint8Nchw, - StridedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dUint8Nhwc, - StridedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dFloatNchw, - StridedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - false, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dFloatNhwc, - StridedTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - true, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dUint8Nchw, - StridedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dUint8Nhwc, - StridedTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - true, - DataLayout::NHWC) - -ARMNN_AUTO_TEST_CASE(MultiChannelTransposeConvolution2dFloatNchw, - MultiChannelTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(MultiChannelTransposeConvolution2dFloatNhwc, - MultiChannelTransposeConvolution2dTest<DataType::Float32, DataType::Float32>, - DataLayout::NHWC) -ARMNN_AUTO_TEST_CASE(MultiChannelTransposeConvolution2dUint8Nchw, - MultiChannelTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - DataLayout::NCHW) -ARMNN_AUTO_TEST_CASE(MultiChannelTransposeConvolution2dUint8Nhwc, - MultiChannelTransposeConvolution2dTest<DataType::QAsymmU8, DataType::Signed32>, - DataLayout::NHWC) - -// Abs -ARMNN_AUTO_TEST_CASE(Abs2d, Abs2dTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(Abs3d, Abs3dTest<DataType::Float32>) - -ARMNN_AUTO_TEST_CASE(AbsZero, AbsZeroTest<DataType::Float32>) - -ARMNN_AUTO_TEST_CASE(Abs2dFloat16, Abs2dTest<DataType::Float16>) -ARMNN_AUTO_TEST_CASE(Abs3dFloat16, Abs3dTest<DataType::Float16>) - -ARMNN_AUTO_TEST_CASE(AbsZeroFloat16, AbsZeroTest<DataType::Float16>) - -// ArgMinMax -ARMNN_AUTO_TEST_CASE(ArgMinFloat32, ArgMinSimpleTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(ArgMaxFloat32, ArgMaxSimpleTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(ArgMinChannel, ArgMinChannelTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(ArgMaxChannel, ArgMaxChannelTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(ArgMaxHeight, ArgMaxHeightTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(ArgMinWidth, ArgMinWidthTest<DataType::Float32>) - -ARMNN_AUTO_TEST_CASE(ArgMinQAsymm8, ArgMinSimpleTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(ArgMaxQAsymm8, ArgMaxSimpleTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(ArgMinChannelQAsymm8, ArgMinChannelTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(ArgMaxChannelQAsymm8, ArgMaxChannelTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(ArgMaxHeightQAsymm8, ArgMaxHeightTest<DataType::QAsymmU8>) -ARMNN_AUTO_TEST_CASE(ArgMinWidthQAsymm8, ArgMinWidthTest<DataType::QAsymmU8>) - -#if defined(ARMNNREF_ENABLED) - -// The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available -// if the reference backend is not built - -// ============================================================================ -// COMPARE tests - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8, CompareSoftmaxUint8Test, 1.0f) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, - CompareDepthwiseConvolution2dFloatTest, - DataLayout::NCHW) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, - CompareDepthwiseConvolution2dUint8Test, - DataLayout::NCHW) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32Nhwc, - CompareDepthwiseConvolution2dFloatTest, - DataLayout::NHWC) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8Nhwc, - CompareDepthwiseConvolution2dUint8Test, - DataLayout::NHWC) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, - NormalizationAlgorithmChannel::Within, - NormalizationAlgorithmMethod::LocalBrightness) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, - NormalizationAlgorithmChannel::Across, - NormalizationAlgorithmMethod::LocalBrightness) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithRef, ComparePooling2dTest, PoolingAlgorithm::Max) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRef, ComparePooling2dTest, PoolingAlgorithm::Average) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRefUint8, ComparePooling2dUint8Test, - PoolingAlgorithm::Average) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithRef, ComparePooling2dTest, PoolingAlgorithm::L2) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithRef, CompareMultiplicationTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) - -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) -ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) - -// ============================================================================ -// FIXTURE tests - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::Sigmoid, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::TanH, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::Linear, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::ReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::BoundedReLu, 5u) -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, - CompareActivationUint8Test, ActivationFunction::BoundedReLu) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::SoftReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::LeakyReLu, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::Abs, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, - CompareActivationTest, ActivationFunction::Sqrt, 5u) - -ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, - CompareActivationTest, ActivationFunction::Square, 5u) - -#endif - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/ClMemCopyTests.cpp b/src/backends/cl/test/ClMemCopyTests.cpp deleted file mode 100644 index 3cd9af7910..0000000000 --- a/src/backends/cl/test/ClMemCopyTests.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClWorkloadFactoryHelper.hpp" - -#include <cl/ClWorkloadFactory.hpp> -#include <aclCommon/test/MemCopyTestImpl.hpp> - -#include <reference/RefWorkloadFactory.hpp> -#include <reference/test/RefWorkloadFactoryHelper.hpp> - -#include <boost/test/unit_test.hpp> - -BOOST_AUTO_TEST_SUITE(ClMemCopy) - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) -{ - LayerTestResult<float, 4> result = - MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(false); - BOOST_TEST(CompareTensors(result.output, result.outputExpected)); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) -{ - LayerTestResult<float, 4> result = - MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory, armnn::DataType::Float32>(false); - BOOST_TEST(CompareTensors(result.output, result.outputExpected)); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) -{ - LayerTestResult<float, 4> result = - MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(true); - BOOST_TEST(CompareTensors(result.output, result.outputExpected)); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) -{ - LayerTestResult<float, 4> result = - MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory, armnn::DataType::Float32>(true); - BOOST_TEST(CompareTensors(result.output, result.outputExpected)); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp deleted file mode 100644 index c2a8005e8a..0000000000 --- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClWorkloadFactoryHelper.hpp" - -#include <Network.hpp> - -#include <test/GraphUtils.hpp> - -#include <cl/ClWorkloadFactory.hpp> - -#include <boost/test/unit_test.hpp> - -BOOST_AUTO_TEST_SUITE(ClOptimizedNetwork) - -BOOST_AUTO_TEST_CASE(OptimizeValidateGpuDeviceSupportLayerNoFallback) -{ - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc }; - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - BOOST_CHECK(optNet); - // validate workloads - armnn::ClWorkloadFactory fact = - ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc); - BOOST_CHECK_NO_THROW( - layer->CreateWorkload(fact)); - } -} - -BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnGpuAcc) -{ - // Test to check when Fp16 Turbo mode set - // it converts the Fp32 network to Fp16 Network - // add Fp32ToFp16 conversion layer after the InputLayer - // add Fp16ToFp32 conversion layer after the OutputLayer - // checks the other layers if they are supported in Fp16 - // if they are not put the conversion layers before and after - // if they are not supported in Fp16 use Fp32 instead - // if there are inverse conversion layers remove them with optimization - // at the moment FloorLayer is not supported in Fp16 so it rolls back to Fp32 - // and inverse conversion layers are removed by the optimizer - armnn::Network net; - - // Defines layers. - auto input = net.AddInputLayer(0, "input layer"); - // ReLu1 - armnn::ActivationDescriptor activation1Descriptor; - activation1Descriptor.m_Function = armnn::ActivationFunction::BoundedReLu; - activation1Descriptor.m_A = 1.f; - activation1Descriptor.m_B = -1.f; - auto activation = net.AddActivationLayer(activation1Descriptor, "activation layer"); - auto output = net.AddOutputLayer(0, "output layer"); - - // Connects layers. - input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); - activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - armnn::TensorShape shape({4}); - armnn::TensorInfo info(shape, armnn::DataType::Float32); - input->GetOutputSlot(0).SetTensorInfo(info); - activation->GetOutputSlot(0).SetTensorInfo(info); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc}; - - armnn::OptimizerOptions optimizerOptions; - optimizerOptions.m_ReduceFp32ToFp16 = true; - - armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize( - net, backends, runtime->GetDeviceSpec(), optimizerOptions); - - const armnn::Graph& graph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph(); - - // Tests that all layers are present in the graph. - BOOST_TEST(graph.GetNumLayers() == 5); - - // Tests that the vertices exist and have correct names. - BOOST_TEST(GraphHasNamedLayer(graph, "input layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "activation layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "output layer")); -} - -BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/backends/cl/test/ClRuntimeTests.cpp b/src/backends/cl/test/ClRuntimeTests.cpp deleted file mode 100644 index 9aa36173d0..0000000000 --- a/src/backends/cl/test/ClRuntimeTests.cpp +++ /dev/null @@ -1,157 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <test/RuntimeTests.hpp> - -#include <LeakChecking.hpp> - -#include <backendsCommon/test/RuntimeTestImpl.hpp> -#include <test/ProfilingTestUtils.hpp> - -#include <boost/core/ignore_unused.hpp> -#include <boost/test/unit_test.hpp> - -#ifdef WITH_VALGRIND -#include <valgrind/memcheck.h> -#endif - -BOOST_AUTO_TEST_SUITE(ClRuntime) - -BOOST_AUTO_TEST_CASE(RuntimeValidateGpuDeviceSupportLayerNoFallback) -{ - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc }; - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - BOOST_CHECK(optNet); - - // Load it into the runtime. It should success. - armnn::NetworkId netId; - BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == armnn::Status::Success); -} - -#ifdef ARMNN_LEAK_CHECKING_ENABLED -BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksGpuAcc) -{ - BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE()); - armnn::IRuntime::CreationOptions options; - armnn::Runtime runtime(options); - armnn::RuntimeLoadedNetworksReserve(&runtime); - - std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc}; - { - // Do a warmup of this so we make sure that all one-time - // initialization happens before we do the leak checking. - CreateAndDropDummyNetwork(backends, runtime); - } - - { - ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkGpuAcc"); - BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); - // In the second run we check for all remaining memory - // in use after the network was unloaded. If there is any - // then it will be treated as a memory leak. - CreateAndDropDummyNetwork(backends, runtime); - BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); - BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0); - BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0); - } -} -#endif - -// Note: this part of the code is due to be removed when we fully trust the gperftools based results. -#if defined(WITH_VALGRIND) -BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage) -{ - // From documentation: - - // This means that no pointer to the block can be found. The block is classified as "lost", - // because the programmer could not possibly have freed it at program exit, since no pointer to it exists. - unsigned long leakedBefore = 0; - unsigned long leakedAfter = 0; - - // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at, - // the programmer could, at least in principle, have freed it before program exit. - // We want to test this in case memory is not freed as early as it could have been. - unsigned long reachableBefore = 0; - unsigned long reachableAfter = 0; - - // Needed as out params but we don't test them. - unsigned long dubious = 0; - unsigned long suppressed = 0; - - // Ensure that runtime is large enough before checking for memory leaks. - // Otherwise, when loading the network, it will automatically reserve memory that won't be released - // until destruction. - armnn::NetworkId networkIdentifier; - armnn::IRuntime::CreationOptions options; - armnn::Runtime runtime(options); - armnn::RuntimeLoadedNetworksReserve(&runtime); - - // Checks for leaks before we load the network and record them so that we can see the delta after unloading. - VALGRIND_DO_QUICK_LEAK_CHECK; - VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed); - - // build a mock-network and load it into the runtime - std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc}; - { - armnn::TensorInfo inputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32); - armnn::TensorInfo outputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32); - - armnn::INetworkPtr mockNetwork(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = mockNetwork->AddInputLayer(0, "input"); - armnn::IConnectableLayer* layer = mockNetwork->AddActivationLayer(armnn::ActivationDescriptor(), "test"); - armnn::IConnectableLayer* output = mockNetwork->AddOutputLayer(0, "output"); - - input->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Sets the tensors in the network. - input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); - layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); - - // optimize the network - armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, backends, runtime.GetDeviceSpec()); - - runtime.LoadNetwork(networkIdentifier, std::move(optNet)); - } - - runtime.UnloadNetwork(networkIdentifier); - - VALGRIND_DO_ADDED_LEAK_CHECK; - VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed); - - // If we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass. - BOOST_TEST(leakedBefore == leakedAfter); - - // Add resonable threshold after and before running valgrind with the ACL clear cache function. - // TODO Threshold set to 80k until the root cause of the memory leakage is found and fixed. Revert threshold - // value to 1024 when fixed. - BOOST_TEST(static_cast<long>(reachableAfter) - static_cast<long>(reachableBefore) < 81920); - - // These are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters - // so they are assigned to, but still considered unused, causing a warning. - boost::ignore_unused(dubious); - boost::ignore_unused(suppressed); -} -#endif - -BOOST_AUTO_TEST_CASE(ProfilingPostOptimisationStructureGpuAcc) -{ - VerifyPostOptimisationStructureTestImpl(armnn::Compute::GpuAcc); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp deleted file mode 100644 index 1dfba7573b..0000000000 --- a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/backends/IBackendInternal.hpp> -#include <armnn/backends/IMemoryManager.hpp> -#include <backendsCommon/test/WorkloadFactoryHelper.hpp> - -#include <cl/ClBackend.hpp> -#include <cl/ClWorkloadFactory.hpp> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace -{ - -template<> -struct WorkloadFactoryHelper<armnn::ClWorkloadFactory> -{ - static armnn::IBackendInternal::IMemoryManagerSharedPtr GetMemoryManager() - { - armnn::ClBackend backend; - return backend.CreateMemoryManager(); - } - - static armnn::ClWorkloadFactory GetFactory( - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) - { - return armnn::ClWorkloadFactory(boost::polymorphic_pointer_downcast<armnn::ClMemoryManager>(memoryManager)); - } -}; - -using ClWorkloadFactoryHelper = WorkloadFactoryHelper<armnn::ClWorkloadFactory>; - -} // anonymous namespace diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp deleted file mode 100644 index ee5163f668..0000000000 --- a/src/backends/cl/test/Fp16SupportTest.cpp +++ /dev/null @@ -1,110 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <armnn/Descriptors.hpp> -#include <armnn/IRuntime.hpp> -#include <armnn/INetwork.hpp> -#include <Half.hpp> - -#include <Graph.hpp> -#include <Optimizer.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <boost/core/ignore_unused.hpp> -#include <boost/test/unit_test.hpp> - -#include <set> - -using namespace armnn; - -BOOST_AUTO_TEST_SUITE(Fp16Support) - -BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport) -{ - Graph graph; - - Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1"); - Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2"); - - Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition"); - Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output"); - - TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); - inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); - inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); - additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - - BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); -} - -BOOST_AUTO_TEST_CASE(Fp16AdditionTest) -{ - using namespace half_float::literal; - // Create runtime in which test will run - IRuntime::CreationOptions options; - IRuntimePtr runtime(IRuntime::Create(options)); - - // Builds up the structure of the network. - INetworkPtr net(INetwork::Create()); - - IConnectableLayer* inputLayer1 = net->AddInputLayer(0); - IConnectableLayer* inputLayer2 = net->AddInputLayer(1); - IConnectableLayer* additionLayer = net->AddAdditionLayer(); - IConnectableLayer* outputLayer = net->AddOutputLayer(0); - - inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); - inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); - additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - //change to float16 - TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16); - inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - - // optimize the network - std::vector<BackendId> backends = {Compute::GpuAcc}; - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - - // Loads it into the runtime. - NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); - - std::vector<Half> input1Data - { - 1.0_h, 2.0_h, 3.0_h, 4.0_h - }; - - std::vector<Half> input2Data - { - 100.0_h, 200.0_h, 300.0_h, 400.0_h - }; - - InputTensors inputTensors - { - {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, - {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} - }; - - std::vector<Half> outputData(input1Data.size()); - OutputTensors outputTensors - { - {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} - }; - - // Does the inference. - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // Checks the results. - BOOST_TEST(outputData == std::vector<Half>({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp deleted file mode 100644 index 13620c4311..0000000000 --- a/src/backends/cl/test/OpenClTimerTest.cpp +++ /dev/null @@ -1,133 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7 - -#include "ClWorkloadFactoryHelper.hpp" - -#include <test/TensorHelpers.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> -#include <backendsCommon/WorkloadFactory.hpp> - -#include <cl/ClContextControl.hpp> -#include <cl/ClWorkloadFactory.hpp> -#include <cl/OpenClTimer.hpp> - -#include <backendsCommon/test/TensorCopyUtils.hpp> -#include <backendsCommon/test/WorkloadTestUtils.hpp> - -#include <arm_compute/runtime/CL/CLScheduler.h> - -#include <boost/format.hpp> -#include <boost/test/unit_test.hpp> - -#include <iostream> - -using namespace armnn; - -struct OpenClFixture -{ - // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case. - // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution - // times from OpenClTimer. - OpenClFixture() : m_ClContextControl(nullptr, true) {} - ~OpenClFixture() {} - - ClContextControl m_ClContextControl; -}; - -BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture) -using FactoryType = ClWorkloadFactory; - -BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm) -{ - auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager(); - ClWorkloadFactory workloadFactory = ClWorkloadFactoryHelper::GetFactory(memoryManager); - - const unsigned int width = 2; - const unsigned int height = 3; - const unsigned int channels = 2; - const unsigned int num = 1; - - TensorInfo inputTensorInfo( {num, channels, height, width}, DataType::Float32); - TensorInfo outputTensorInfo({num, channels, height, width}, DataType::Float32); - TensorInfo tensorInfo({channels}, DataType::Float32); - - auto input = MakeTensor<float, 4>(inputTensorInfo, - { - 1.f, 4.f, - 4.f, 2.f, - 1.f, 6.f, - - 1.f, 1.f, - 4.f, 1.f, - -2.f, 4.f - }); - - // these values are per-channel of the input - auto mean = MakeTensor<float, 1>(tensorInfo, { 3.f, -2.f }); - auto variance = MakeTensor<float, 1>(tensorInfo, { 4.f, 9.f }); - auto beta = MakeTensor<float, 1>(tensorInfo, { 3.f, 2.f }); - auto gamma = MakeTensor<float, 1>(tensorInfo, { 2.f, 1.f }); - - std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - BatchNormalizationQueueDescriptor data; - WorkloadInfo info; - ScopedCpuTensorHandle meanTensor(tensorInfo); - ScopedCpuTensorHandle varianceTensor(tensorInfo); - ScopedCpuTensorHandle betaTensor(tensorInfo); - ScopedCpuTensorHandle gammaTensor(tensorInfo); - - AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); - AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); - AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); - AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Mean = &meanTensor; - data.m_Variance = &varianceTensor; - data.m_Beta = &betaTensor; - data.m_Gamma = &gammaTensor; - data.m_Parameters.m_Eps = 0.0f; - - // for each channel: - // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) - // multiply by gamma and add beta - std::unique_ptr<IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - OpenClTimer openClTimer; - - BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer"); - - //Start the timer - openClTimer.Start(); - - //Execute the workload - workload->Execute(); - - //Stop the timer - openClTimer.Stop(); - - BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1); - - BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name, - "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]"); - - BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0); - -} - -BOOST_AUTO_TEST_SUITE_END() - -#endif //aarch64 or x86_64 diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt deleted file mode 100644 index de62ca9496..0000000000 --- a/src/backends/cl/workloads/CMakeLists.txt +++ /dev/null @@ -1,107 +0,0 @@ -# -# Copyright © 2017 Arm Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -list(APPEND armnnClBackendWorkloads_sources - ClAbsWorkload.cpp - ClAbsWorkload.hpp - ClActivationWorkload.cpp - ClActivationWorkload.hpp - ClAdditionWorkload.cpp - ClAdditionWorkload.hpp - ClArgMinMaxWorkload.cpp - ClArgMinMaxWorkload.hpp - ClBatchNormalizationFloatWorkload.cpp - ClBatchNormalizationFloatWorkload.hpp - ClBatchToSpaceNdWorkload.cpp - ClBatchToSpaceNdWorkload.hpp - ClConcatWorkload.cpp - ClConcatWorkload.hpp - ClConstantWorkload.cpp - ClConstantWorkload.hpp - ClConvertFp16ToFp32Workload.cpp - ClConvertFp16ToFp32Workload.hpp - ClConvertFp32ToFp16Workload.cpp - ClConvertFp32ToFp16Workload.hpp - ClConvolution2dWorkload.cpp - ClConvolution2dWorkload.hpp - ClDepthToSpaceWorkload.cpp - ClDepthToSpaceWorkload.hpp - ClDepthwiseConvolutionWorkload.cpp - ClDepthwiseConvolutionWorkload.hpp - ClDequantizeWorkload.cpp - ClDequantizeWorkload.hpp - ClDivisionFloatWorkload.cpp - ClDivisionFloatWorkload.hpp - ClFloorFloatWorkload.cpp - ClFloorFloatWorkload.hpp - ClFullyConnectedWorkload.cpp - ClFullyConnectedWorkload.hpp - ClGreaterWorkload.cpp - ClGreaterWorkload.hpp - ClInstanceNormalizationWorkload.cpp - ClInstanceNormalizationWorkload.hpp - ClL2NormalizationFloatWorkload.cpp - ClL2NormalizationFloatWorkload.hpp - ClLstmFloatWorkload.cpp - ClLstmFloatWorkload.hpp - ClMaximumWorkload.cpp - ClMaximumWorkload.hpp - ClMeanWorkload.cpp - ClMeanWorkload.hpp - ClMinimumWorkload.cpp - ClMinimumWorkload.hpp - ClMultiplicationWorkload.cpp - ClMultiplicationWorkload.hpp - ClNormalizationFloatWorkload.cpp - ClNormalizationFloatWorkload.hpp - ClPadWorkload.cpp - ClPadWorkload.hpp - ClPermuteWorkload.cpp - ClPermuteWorkload.hpp - ClPooling2dWorkload.cpp - ClPooling2dWorkload.hpp - ClPreluWorkload.cpp - ClPreluWorkload.hpp - ClQuantizedLstmWorkload.cpp - ClQuantizedLstmWorkload.hpp - ClQuantizeWorkload.cpp - ClQuantizeWorkload.hpp - ClReshapeWorkload.cpp - ClReshapeWorkload.hpp - ClResizeWorkload.cpp - ClResizeWorkload.hpp - ClRsqrtWorkload.cpp - ClRsqrtWorkload.hpp - ClSliceWorkload.cpp - ClSliceWorkload.hpp - ClSoftmaxBaseWorkload.cpp - ClSoftmaxBaseWorkload.hpp - ClSoftmaxFloatWorkload.cpp - ClSoftmaxFloatWorkload.hpp - ClSoftmaxUint8Workload.cpp - ClSoftmaxUint8Workload.hpp - ClSpaceToBatchNdWorkload.hpp - ClSpaceToBatchNdWorkload.cpp - ClSpaceToDepthWorkload.cpp - ClSpaceToDepthWorkload.hpp - ClSplitterWorkload.cpp - ClSplitterWorkload.hpp - ClStackWorkload.cpp - ClStackWorkload.hpp - ClStridedSliceWorkload.cpp - ClStridedSliceWorkload.hpp - ClSubtractionWorkload.cpp - ClSubtractionWorkload.hpp - ClTransposeConvolution2dWorkload.cpp - ClTransposeConvolution2dWorkload.hpp - ClWorkloads.hpp - ClWorkloadUtils.hpp -) - -add_library(armnnClBackendWorkloads OBJECT ${armnnClBackendWorkloads_sources}) -target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) -target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) -target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/backends) -target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/profiling) diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp deleted file mode 100644 index 058c453c6b..0000000000 --- a/src/backends/cl/workloads/ClAbsWorkload.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClAbsWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <cl/ClTensorHandle.hpp> - -#include <boost/cast.hpp> - -namespace armnn -{ - -arm_compute::Status ClAbsWorkloadValidate(const TensorInfo& input, const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput); -} - -ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<AbsQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClAbsWorkload", 1, 1); - - arm_compute::ICLTensor& input = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_AbsLayer.configure(&input, &output); -} - -void ClAbsWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClAbsWorkload_Execute"); - RunClFunction(m_AbsLayer, CHECK_LOCATION()); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClAbsWorkload.hpp b/src/backends/cl/workloads/ClAbsWorkload.hpp deleted file mode 100644 index 763cafcfbd..0000000000 --- a/src/backends/cl/workloads/ClAbsWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/core/Error.h> -#include <arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h> - -namespace armnn -{ - -arm_compute::Status ClAbsWorkloadValidate(const TensorInfo& input, const TensorInfo& output); - -class ClAbsWorkload : public BaseWorkload<AbsQueueDescriptor> -{ -public: - ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLAbsLayer m_AbsLayer; -}; - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp deleted file mode 100644 index 685652036b..0000000000 --- a/src/backends/cl/workloads/ClActivationWorkload.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClActivationWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <aclCommon/ArmComputeUtils.hpp> - -namespace armnn -{ -arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); - - return arm_compute::CLActivationLayer::validate(&aclInput, - &aclOutput, - activationLayerInfo); -} - -ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<ActivationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClActivationWorkload", 1, 1); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); - - arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_ActivationLayer.configure(&input, &output, activationLayerInfo); -} - -void ClActivationWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationWorkload_Execute"); - RunClFunction(m_ActivationLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClActivationWorkload.hpp b/src/backends/cl/workloads/ClActivationWorkload.hpp deleted file mode 100644 index 35166332e6..0000000000 --- a/src/backends/cl/workloads/ClActivationWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLActivationLayer.h> - -namespace armnn -{ -arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor); - -class ClActivationWorkload : public BaseWorkload<ActivationQueueDescriptor> -{ -public: - ClActivationWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLActivationLayer m_ActivationLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp deleted file mode 100644 index 18e2400ccd..0000000000 --- a/src/backends/cl/workloads/ClAdditionWorkload.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClAdditionWorkload.hpp" - -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<AdditionQueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); -} - -void ClAdditionWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -arm_compute::Status ClAdditionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo, - g_AclConvertPolicy); - - return aclStatus; -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp deleted file mode 100644 index 62bd0ae20b..0000000000 --- a/src/backends/cl/workloads/ClAdditionWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLElementwiseOperations.h> - -namespace armnn -{ - -class ClAdditionWorkload : public BaseWorkload<AdditionQueueDescriptor> -{ -public: - ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticAddition m_Layer; -}; - -arm_compute::Status ClAdditionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); -} //namespace armnn diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp deleted file mode 100644 index a79a7b286d..0000000000 --- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClArgMinMaxWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <armnnUtils/TensorUtils.hpp> - -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -namespace -{ -unsigned int CalcAclAxis(unsigned int numDimensions, unsigned int axisIndex) -{ - return (numDimensions - axisIndex) - 1; -} - -} //namespace - -namespace armnn -{ - -arm_compute::Status ClArgMinMaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ArgMinMaxDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - auto numDims = input.GetNumDimensions(); - auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis); - int aclAxis = boost::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis)); - - if (descriptor.m_Function == ArgMinMaxFunction::Max) - { - return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput, - arm_compute::ReductionOperation::ARG_IDX_MAX); - } - else - { - return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput, - arm_compute::ReductionOperation::ARG_IDX_MIN); - } -} - - -ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info) -{ - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - - auto numDims = info.m_InputTensorInfos[0].GetNumDimensions(); - auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, m_Data.m_Parameters.m_Axis); - int aclAxis = boost::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis)); - - if (m_Data.m_Parameters.m_Function == ArgMinMaxFunction::Max) - { - m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MAX); - } - else - { - m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MIN); - } -} - -void ClArgMinMaxWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClArgMinMaxWorkload_Execute"); - RunClFunction(m_ArgMinMaxLayer, CHECK_LOCATION()); -} - -} //namespace armnn - diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp deleted file mode 100644 index 54f28e6175..0000000000 --- a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/core/Error.h> -#include <arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h> - -namespace armnn -{ - -arm_compute::Status ClArgMinMaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ArgMinMaxDescriptor& descriptor); - -class ClArgMinMaxWorkload : public BaseWorkload<ArgMinMaxQueueDescriptor> -{ -public: - ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLArgMinMaxLayer m_ArgMinMaxLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp deleted file mode 100644 index fa0be85100..0000000000 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBatchNormalizationFloatWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <cl/ClLayerSupport.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor &desc) -{ - const arm_compute::TensorInfo aclInputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(input, desc.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(output, desc.m_DataLayout); - const arm_compute::TensorInfo aclMeanInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(mean, desc.m_DataLayout); - const arm_compute::TensorInfo aclVarInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(var, desc.m_DataLayout); - const arm_compute::TensorInfo aclBetaInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(beta, desc.m_DataLayout); - const arm_compute::TensorInfo aclGammaInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(gamma, desc.m_DataLayout); - - return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, - &aclOutputInfo, - &aclMeanInfo, - &aclVarInfo, - &aclBetaInfo, - &aclGammaInfo, - desc.m_Eps); -} - -ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info) -{ - m_Mean = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); - - m_Variance = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); - - m_Gamma = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); - - m_Beta = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); - - m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - m_Layer.configure(&input, - &output, - m_Mean.get(), - m_Variance.get(), - m_Beta.get(), - m_Gamma.get(), - m_Data.m_Parameters.m_Eps); - - InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean); - InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance); - InitializeArmComputeClTensorData(*m_Beta, m_Data.m_Beta); - InitializeArmComputeClTensorData(*m_Gamma, m_Data.m_Gamma); - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_Layer.prepare(); - FreeUnusedTensors(); -} - -void ClBatchNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_Mean); - FreeTensorIfUnused(m_Variance); - FreeTensorIfUnused(m_Gamma); - FreeTensorIfUnused(m_Beta); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp deleted file mode 100644 index e94bef20ac..0000000000 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h> - -namespace armnn -{ - -arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& desc); - -class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor> -{ -public: - ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - - using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLBatchNormalizationLayer m_Layer; - - std::unique_ptr<arm_compute::CLTensor> m_Mean; - std::unique_ptr<arm_compute::CLTensor> m_Variance; - std::unique_ptr<arm_compute::CLTensor> m_Gamma; - std::unique_ptr<arm_compute::CLTensor> m_Beta; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - - - diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp deleted file mode 100644 index a714e031e4..0000000000 --- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBatchToSpaceNdWorkload.hpp" - -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc, - const WorkloadInfo& info) - : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info) -{ - m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - input.info()->set_data_layout(aclDataLayout); - - // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[0]); - int32_t blockWidth = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[1]); - - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - output.info()->set_data_layout(aclDataLayout); - - m_Layer.configure(&input, blockWidth, blockHeight, &output); -} - -void ClBatchToSpaceNdWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchToSpaceNdWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const BatchToSpaceNdDescriptor& desc) { - DataLayout dataLayout = desc.m_DataLayout; - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout); - - // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = boost::numeric_cast<int32_t>(desc.m_BlockShape[0]); - int32_t blockWidth = boost::numeric_cast<int32_t>(desc.m_BlockShape[1]); - - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout); - - const arm_compute::Status aclStatus = arm_compute::CLBatchToSpaceLayer::validate(&aclInputInfo, - blockWidth, - blockHeight, - &aclOutputInfo); - return aclStatus; -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp deleted file mode 100644 index 881b294097..0000000000 --- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> -#include <arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h> - -namespace armnn -{ - -arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const BatchToSpaceNdDescriptor& desc); - -class ClBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescriptor> -{ -public: - ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - - mutable arm_compute::CLBatchToSpaceLayer m_Layer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp deleted file mode 100644 index 5370466163..0000000000 --- a/src/backends/cl/workloads/ClConcatWorkload.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "ClConcatWorkload.hpp" -#include "ClWorkloadUtils.hpp" -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -#include <arm_compute/core/Types.h> -#include <arm_compute/runtime/CL/functions/CLConcatenateLayer.h> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ -using namespace armcomputetensorutils; - -namespace -{ -size_t CalcAxis(const OriginsDescriptor& desc) -{ - return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1; -} -} //namespace - -arm_compute::Status ClConcatWorkloadValidate(const std::vector<const TensorInfo*>& inputs, - const TensorInfo& output, - const OriginsDescriptor& descriptor) -{ - std::vector<arm_compute::TensorInfo> aclInputs; - for (const TensorInfo* input : inputs) - { - arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW); - aclInputs.emplace_back(aclInputInfo); - } - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - std::vector<arm_compute::ITensorInfo*> aclInputPtrs; - for (arm_compute::ITensorInfo& input : aclInputs) - { - aclInputPtrs.emplace_back(&input); - } - - size_t aclAxis = CalcAxis(descriptor); - return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis); -} - -ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info) -: BaseWorkload<ConcatQueueDescriptor>(descriptor, info) -{ - bool allInputsAreSubtensors = true; - - // Check that all inputs are sub-tensors - for (auto input : descriptor.m_Inputs) - { - if (!input->GetParent()) - { - // Non sub-tensor input found so we need to execute the concat function - allInputsAreSubtensors = false; - break; - } - } - - if (allInputsAreSubtensors) - { - // Can skip configuring the concat function since it's not executed - return; - } - - std::vector<arm_compute::ICLTensor *> aclInputs; - for (auto input : m_Data.m_Inputs) - { - arm_compute::ICLTensor& aclInput = boost::polymorphic_pointer_downcast<IClTensorHandle>(input)->GetTensor(); - aclInputs.emplace_back(&aclInput); - } - arm_compute::ICLTensor& output = boost::polymorphic_pointer_downcast<IClTensorHandle>( - m_Data.m_Outputs[0])->GetTensor(); - - // Create the layer function - auto layer = std::make_unique<arm_compute::CLConcatenateLayer>(); - - // Configure input and output tensors - size_t aclAxis = CalcAxis(descriptor.m_Parameters); - layer->configure(aclInputs, &output, aclAxis); - - // Prepare - layer->prepare(); - m_Layer = std::move(layer); -} - -void ClConcatWorkload::Execute() const -{ - if (m_Layer) - { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConcatWorkload_Execute"); - m_Layer->run(); - } -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConcatWorkload.hpp b/src/backends/cl/workloads/ClConcatWorkload.hpp deleted file mode 100644 index 772bc094ea..0000000000 --- a/src/backends/cl/workloads/ClConcatWorkload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/core/Error.h> -#include <arm_compute/runtime/IFunction.h> - -namespace armnn -{ - -arm_compute::Status ClConcatWorkloadValidate(const std::vector<const TensorInfo*>& inputs, - const TensorInfo& output, - const OriginsDescriptor& descriptor); - -class ClConcatWorkload : public BaseWorkload<ConcatQueueDescriptor> -{ -public: - ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable std::unique_ptr<arm_compute::IFunction> m_Layer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantWorkload.cpp b/src/backends/cl/workloads/ClConstantWorkload.cpp deleted file mode 100644 index 39ae14eaf3..0000000000 --- a/src/backends/cl/workloads/ClConstantWorkload.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConstantWorkload.hpp" - -#include <Half.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<ConstantQueueDescriptor>(descriptor, info) - , m_RanOnce(false) -{ -} - -void ClConstantWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantWorkload_Execute"); - - // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data - // on the first inference, then reused for subsequent inferences. - // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not - // have been configured at the time. - if (!m_RanOnce) - { - const ConstantQueueDescriptor& data = this->m_Data; - - BOOST_ASSERT(data.m_LayerOutput != nullptr); - arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor(); - arm_compute::DataType computeDataType = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetDataType(); - - switch (computeDataType) - { - case arm_compute::DataType::F16: - { - CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<Half>()); - break; - } - case arm_compute::DataType::F32: - { - CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<float>()); - break; - } - case arm_compute::DataType::QASYMM8: - { - CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<uint8_t>()); - break; - } - default: - { - BOOST_ASSERT_MSG(false, "Unknown data type"); - break; - } - } - - m_RanOnce = true; - } -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantWorkload.hpp b/src/backends/cl/workloads/ClConstantWorkload.hpp deleted file mode 100644 index 75325dc409..0000000000 --- a/src/backends/cl/workloads/ClConstantWorkload.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -namespace armnn -{ -class ClConstantWorkload : public BaseWorkload<ConstantQueueDescriptor> -{ -public: - ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable bool m_RanOnce; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp deleted file mode 100644 index d2e86f8c94..0000000000 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvertFp16ToFp32Workload.hpp" -#include <cl/ClTensorHandle.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( - const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : - Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); -} - -void ClConvertFp16ToFp32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, const TensorInfo& output) -{ - if (input.GetDataType() != DataType::Float16) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float16"); - } - if (output.GetDataType() != DataType::Float32) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float32"); - } - - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( - &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); - - return aclStatus; -} - - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp deleted file mode 100644 index ef5c9b6497..0000000000 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLDepthConvertLayer.h> - -namespace armnn -{ - -class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor> -{ -public: - - ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLDepthConvertLayer m_Layer; -}; - -arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, const TensorInfo& output); - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp deleted file mode 100644 index 3f528a1532..0000000000 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvertFp32ToFp16Workload.hpp" -#include <cl/ClTensorHandle.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( - const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : - Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); -} - -void ClConvertFp32ToFp16Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, const TensorInfo& output) -{ - if (input.GetDataType() != DataType::Float32) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float32"); - } - if (output.GetDataType() != DataType::Float16) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float16"); - } - - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( - &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); - - return aclStatus; -} - - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp deleted file mode 100644 index 6e04e39425..0000000000 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLDepthConvertLayer.h> - -namespace armnn -{ - -class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor> -{ -public: - - ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLDepthConvertLayer m_Layer; -}; - -arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, const TensorInfo& output); - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp deleted file mode 100644 index e8af0ee3b7..0000000000 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ /dev/null @@ -1,122 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvolution2dWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h> - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); - - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, - descriptor.m_DilationY); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.has_value()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); - - return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - layerInfo, - arm_compute::WeightsInfo(), - aclDilationInfo); -} - -ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info) - , m_ConvolutionLayer(memoryManager) -{ - // todo: check tensor shapes match. - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); - - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(m_Data.m_Parameters.m_DilationX, - m_Data.m_Parameters.m_DilationY); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - } - - m_Data.ValidateInputsOutputs("ClConvolution2dWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); - - m_ConvolutionLayer.configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - arm_compute::WeightsInfo(), - aclDilationInfo); - - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_ConvolutionLayer.prepare(); - FreeUnusedTensors(); -} - -void ClConvolution2dWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dWorkload_Execute"); - RunClFunction(m_ConvolutionLayer, CHECK_LOCATION()); -} - -void ClConvolution2dWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp deleted file mode 100644 index 6d7e9f3ea1..0000000000 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> -#include <armnn/Descriptors.hpp> - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ - -arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases); - -class ClConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor> -{ -public: - ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; - - std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp deleted file mode 100644 index 800a98409e..0000000000 --- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthToSpaceWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <cl/ClTensorHandle.hpp> - -#include <boost/numeric/conversion/cast.hpp> -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthToSpaceDescriptor& desc) -{ - DataLayout dataLayout = desc.m_DataLayout; - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout); - - int32_t blockSize = boost::numeric_cast<int32_t>(desc.m_BlockSize); - - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout); - - const arm_compute::Status aclStatus = arm_compute::CLDepthToSpaceLayer::validate(&aclInputInfo, - &aclOutputInfo, - blockSize); - return aclStatus; -} - -ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc, - const WorkloadInfo& info) - : BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info) -{ - m_Data.ValidateInputsOutputs("ClDepthToSpaceWorkload", 1, 1); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - - arm_compute::ICLTensor& input = - boost::polymorphic_pointer_downcast<IClTensorHandle>(m_Data.m_Inputs[0])->GetTensor(); - input.info()->set_data_layout(aclDataLayout); - - int32_t blockSize = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize); - - arm_compute::ICLTensor& output = - boost::polymorphic_pointer_downcast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor(); - output.info()->set_data_layout(aclDataLayout); - - m_Layer.configure(&input, &output, blockSize); -} - -void ClDepthToSpaceWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthToSpaceWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp deleted file mode 100644 index de8b496669..0000000000 --- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Descriptors.hpp> - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h> - -namespace armnn -{ - -arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthToSpaceDescriptor& desc); - -class ClDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor> -{ -public: - ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLDepthToSpaceLayer m_Layer; -}; - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp deleted file mode 100644 index 858eab4e00..0000000000 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthwiseConvolutionWorkload.hpp" - -#include <ResolveType.hpp> -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <backendsCommon/WorkloadUtils.hpp> - -#include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - // ArmNN's weight format is [ M, I, H, W ] - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either - // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library - TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); - - // Convert the weights into the compute library format - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.has_value()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D( - descriptor.m_DilationX, - descriptor.m_DilationY); - - return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - aclPadStrideInfo, - aclDepthMultiplier, - arm_compute::ActivationLayerInfo(), - aclDilationInfo); - -} - -ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) -{ - // Allocate a buffer for the swizzling of the weight tensor - std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); - - // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either - // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library - ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, - m_Data.m_Parameters.m_DataLayout, - permuteBuffer.get()); - - // Convert the weights into the compute library format - m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - } - - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D( - m_Data.m_Parameters.m_DilationX, - m_Data.m_Parameters.m_DilationY); - - - std::string name = std::string("ClDepthwiseConvolutionWorkload"); - m_Data.ValidateInputsOutputs(name, 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - // ArmNN's weight format is [ M, I, H, W ] - auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - // Get the depth multiplier - const unsigned int depthMultiplier = weightInfo.GetShape()[0]; - - arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); - - m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>(); - static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure( - &input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - depthMultiplier, - arm_compute::ActivationLayerInfo(), - aclDilationInfo); - - BOOST_ASSERT(m_DepthwiseConvolutionLayer); - - ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted); - InitializeArmComputeClTensorData(*m_KernelTensor, &weightsPermutedHandle); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - m_DepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void ClDepthwiseConvolutionWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -void ClDepthwiseConvolutionWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionWorkload_Execute"); - BOOST_ASSERT(m_DepthwiseConvolutionLayer); - - RunClFunction(*m_DepthwiseConvolutionLayer, CHECK_LOCATION()); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp deleted file mode 100644 index fc277b9947..0000000000 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/IFunction.h> -#include <arm_compute/core/Error.h> -#include <arm_compute/runtime/CL/CLTensor.h> - -namespace armnn -{ - -arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases); - -class ClDepthwiseConvolutionWorkload : public BaseWorkload<DepthwiseConvolution2dQueueDescriptor> -{ -public: - using BaseWorkload<DepthwiseConvolution2dQueueDescriptor>::m_Data; - - ClDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - - void Execute() const override; - -protected: - std::unique_ptr<arm_compute::IFunction> m_DepthwiseConvolutionLayer; - - std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp deleted file mode 100644 index eca795de7e..0000000000 --- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDequantizeWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <arm_compute/core/Types.h> - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClDequantizeWorkloadValidate(const TensorInfo& input, const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - return arm_compute::CLDequantizationLayer::validate(&aclInputInfo, &aclOutputInfo); -} - -ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor, - const WorkloadInfo& workloadInfo) - : BaseWorkload<DequantizeQueueDescriptor>(descriptor, workloadInfo) -{ - m_Data.ValidateInputsOutputs("ClDequantizeWorkload", 1, 1); - - arm_compute::ICLTensor& input = boost::polymorphic_pointer_downcast<IClTensorHandle>( - m_Data.m_Inputs[0])->GetTensor(); - - arm_compute::ICLTensor& output = boost::polymorphic_pointer_downcast<IClTensorHandle>( - m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.reset(new arm_compute::CLDequantizationLayer()); - m_Layer->configure(&input, &output); - m_Layer->prepare(); -} - -void ClDequantizeWorkload::Execute() const -{ - if (m_Layer) - { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDequantizeWorkload_Execute"); - m_Layer->run(); - } -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.hpp b/src/backends/cl/workloads/ClDequantizeWorkload.hpp deleted file mode 100644 index 6e61da2ebf..0000000000 --- a/src/backends/cl/workloads/ClDequantizeWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLDequantizationLayer.h> - -namespace armnn -{ - -arm_compute::Status ClDequantizeWorkloadValidate(const TensorInfo& input, const TensorInfo& output); - -class ClDequantizeWorkload : public BaseWorkload<DequantizeQueueDescriptor> -{ -public: - ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor, const WorkloadInfo& workloadInfo); - - void Execute() const override; - -private: - mutable std::unique_ptr<arm_compute::CLDequantizationLayer> m_Layer; -}; - -} // namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp deleted file mode 100644 index 2a27f8a9bc..0000000000 --- a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDivisionFloatWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); -} - - -ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<DivisionQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - // Construct - m_ArithmeticDivision.configure(&input0, &input1, &output); -} - -void ClDivisionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); - RunClFunction(m_ArithmeticDivision, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp deleted file mode 100644 index ddca87d78a..0000000000 --- a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLElementwiseOperations.h> - -namespace armnn -{ - -arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor> -{ -public: - ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const - WorkloadInfo& info); - - using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp deleted file mode 100644 index f38342ed39..0000000000 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClFloorFloatWorkload.hpp" -#include <cl/ClTensorHandle.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClFloorWorkloadValidate(const TensorInfo& input, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLFloor::validate(&aclInput, &aclOutput); -} - -ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload<FloorQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void ClFloorFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp deleted file mode 100644 index 1ddaddff0b..0000000000 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLFloor.h> - -namespace armnn -{ - -arm_compute::Status ClFloorWorkloadValidate(const TensorInfo& input, - const TensorInfo& output); - -class ClFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor> -{ -public: - ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLFloor m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp deleted file mode 100644 index 60eb138b42..0000000000 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClFullyConnectedWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <aclCommon/ArmComputeUtils.hpp> -#include <cl/ClLayerSupport.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiases; - arm_compute::TensorInfo *optionalAclBiases = nullptr; - if (descriptor.m_BiasEnabled) - { - aclBiases = BuildArmComputeTensorInfo(biases); - optionalAclBiases = &aclBiases; - } - - const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); - - return arm_compute::CLFullyConnectedLayer::validate(&aclInput, - &aclWeights, - optionalAclBiases, - &aclOutput, - fullyConnectedLayerInfo); -} - -ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) - , m_FullyConnectedLayer(memoryManager) -{ - m_WeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasesTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); - } - - m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - // Construct - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; - m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); - - InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); - - if (m_BiasesTensor) - { - InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_FullyConnectedLayer.prepare(); - FreeUnusedTensors(); -} - -void ClFullyConnectedWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); - RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION()); -} - -void ClFullyConnectedWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp deleted file mode 100644 index e13436eaa5..0000000000 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ - -arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor); - -class ClFullyConnectedWorkload : public armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor> -{ -public: - ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, - const armnn::WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - using armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data; - void Execute() const override; - -private: - mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; - - std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClGreaterWorkload.cpp b/src/backends/cl/workloads/ClGreaterWorkload.cpp deleted file mode 100644 index b086122bdc..0000000000 --- a/src/backends/cl/workloads/ClGreaterWorkload.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClGreaterWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClGreaterWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLComparison::validate( - &aclInput0Info, - &aclInput1Info, - &aclOutputInfo, - arm_compute::ComparisonOperation::Greater); - - return aclStatus; -} - -template<DataType T> -ClGreaterWorkload<T>::ClGreaterWorkload(const GreaterQueueDescriptor& descriptor, - const WorkloadInfo& info) - : MultiTypedWorkload<GreaterQueueDescriptor, T, DataType::Boolean>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClGreaterWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_GreaterLayer.configure(&input0, &input1, &output, arm_compute::ComparisonOperation::Greater); -} - -template<DataType T> -void ClGreaterWorkload<T>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClGreaterWorkload_Execute"); - RunClFunction(m_GreaterLayer, CHECK_LOCATION()); -} - -template class ClGreaterWorkload<DataType::Float32>; -template class ClGreaterWorkload<DataType::QAsymmU8>; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClGreaterWorkload.hpp b/src/backends/cl/workloads/ClGreaterWorkload.hpp deleted file mode 100644 index 862e168334..0000000000 --- a/src/backends/cl/workloads/ClGreaterWorkload.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLComparison.h> - -namespace armnn -{ - -arm_compute::Status ClGreaterWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -template<DataType T> -class ClGreaterWorkload : public MultiTypedWorkload<GreaterQueueDescriptor, T, DataType::Boolean> -{ -public: - ClGreaterWorkload(const GreaterQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - using MultiTypedWorkload<GreaterQueueDescriptor, T, DataType::Boolean>::m_Data; - mutable arm_compute::CLComparison m_GreaterLayer; -}; - -using ClGreaterFloat32Workload = ClGreaterWorkload<DataType::Float32>; -using ClGreaterUint8Workload = ClGreaterWorkload<DataType::QAsymmU8>; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp deleted file mode 100644 index 50cf345a7f..0000000000 --- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClInstanceNormalizationWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <cl/ClTensorHandle.hpp> - -using namespace armnn::armcomputetensorutils; - -namespace armnn -{ - -arm_compute::Status ClInstanceNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const InstanceNormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - return arm_compute::CLInstanceNormalizationLayer::validate(&aclInputInfo, - &aclOutputInfo, - descriptor.m_Gamma, - descriptor.m_Beta, - descriptor.m_Eps); -} - -ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload( - const InstanceNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClInstanceNormalizationWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - m_Layer.configure(&input, - &output, - descriptor.m_Parameters.m_Gamma, - descriptor.m_Parameters.m_Beta, - descriptor.m_Parameters.m_Eps); -}; - -void ClInstanceNormalizationWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClInstanceNormalizationWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp deleted file mode 100644 index 0e37bdcc9b..0000000000 --- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h> - -namespace armnn -{ - -arm_compute::Status ClInstanceNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const InstanceNormalizationDescriptor& descriptor); - -class ClInstanceNormalizationWorkload : public BaseWorkload<InstanceNormalizationQueueDescriptor> -{ -public: - ClInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLInstanceNormalizationLayer m_Layer; -}; - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp deleted file mode 100644 index e1a61277d5..0000000000 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClL2NormalizationFloatWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeUtils.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0; - - return arm_compute::CLL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps); -} - -ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0; - - m_Layer.configure(&input, &output, axis, m_Data.m_Parameters.m_Eps); -} - -void ClL2NormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp deleted file mode 100644 index 26aea9fd88..0000000000 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h> - -namespace armnn -{ - -arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor); - -class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor> -{ -public: - ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - // Purposely not a CLL2Normalize function. See constructor. - mutable arm_compute::CLL2NormalizeLayer m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp deleted file mode 100644 index 2f3ba75275..0000000000 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp +++ /dev/null @@ -1,435 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClLstmFloatWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <arm_compute/runtime/CL/functions/CLLSTMLayer.h> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) - : FloatWorkload<LstmQueueDescriptor>(descriptor, info) -{ - arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param; - - // Basic parameters - m_InputToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); - - m_InputToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); - - m_InputToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); - - m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); - - m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); - - m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); - - m_ForgetGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); - - m_CellBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); - - m_OutputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); - - // for future reference: check the AndroidNN API for the logic here - if (!m_Data.m_Parameters.m_CifgEnabled) - { - m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); - - m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); - - m_CellToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - if (m_Data.m_CellToInputWeights != nullptr) - { - BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo()); - } - - m_InputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); - - lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(), - m_RecurrentToInputWeightsTensor.get(), - m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr, - m_InputGateBiasTensor.get()); - } - - if (m_Data.m_Parameters.m_ProjectionEnabled) - { - m_ProjectionWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo()); - - m_ProjectionBiasTensor = std::make_unique<arm_compute::CLTensor>(); - if (m_Data.m_ProjectionBias != nullptr) - { - BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo()); - } - - lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(), - m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr); - } - - if (m_Data.m_Parameters.m_PeepholeEnabled) - { - m_CellToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo()); - - m_CellToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo()); - - lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get()); - } - - if (m_Data.m_Parameters.m_LayerNormEnabled) - { - m_InputLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - m_ForgetLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - m_CellLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - m_OutputLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - - if (!m_Data.m_Parameters.m_CifgEnabled) - { - BuildArmComputeTensor(*m_InputLayerNormWeightsTensor, m_Data.m_InputLayerNormWeights->GetTensorInfo()); - } - BuildArmComputeTensor(*m_ForgetLayerNormWeightsTensor, m_Data.m_ForgetLayerNormWeights->GetTensorInfo()); - BuildArmComputeTensor(*m_CellLayerNormWeightsTensor, m_Data.m_CellLayerNormWeights->GetTensorInfo()); - BuildArmComputeTensor(*m_OutputLayerNormWeightsTensor, m_Data.m_OutputLayerNormWeights->GetTensorInfo()); - - lstm_param.set_layer_normalization_params(m_Data.m_Parameters.m_CifgEnabled ? nullptr : - m_InputLayerNormWeightsTensor.get(), - m_ForgetLayerNormWeightsTensor.get(), - m_CellLayerNormWeightsTensor.get(), - m_OutputLayerNormWeightsTensor.get()); - } - - const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - const arm_compute::ICLTensor& output_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - const arm_compute::ICLTensor& cell_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor(); - - arm_compute::ICLTensor& output_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor(); - arm_compute::ICLTensor& cell_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[2])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[3])->GetTensor(); - - // Get the batch_size and the num_units from the cellStateIn dimensions - const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2]; - const unsigned int batch_size = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[0]); - const unsigned int num_units = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[1]); - - m_ScratchBuffer = std::make_unique<arm_compute::CLTensor>(); - if (m_Data.m_Parameters.m_CifgEnabled) - { - // 2D tensor with dimensions [num_units * 3, batch_size] with CIFG - armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 3 }, DataType::Float32); - BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1); - } - else - { - // scratch_buffer [num_units * 4, batch_size] without CIFG - armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 4 }, DataType::Float32); - BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2); - } - - float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell; - float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj; - - // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations - arm_compute::ActivationLayerInfo activationLayerInfo; - if (m_Data.m_Parameters.m_ActivationFunc == 0) - { - // no activation, do nothing - } - else if (m_Data.m_Parameters.m_ActivationFunc == 1) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::RELU); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 3) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 4) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 6) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); - } - else - { - throw armnn::Exception("Wrong Type of Activation Function!"); - } - - m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), - m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), - m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), - m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), - &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, - &cell_state_out, &output, lstm_param, activationLayerInfo, - cell_threshold, projection_threshold); - - armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); - - InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights); - InitializeArmComputeClTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights); - InitializeArmComputeClTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights); - InitializeArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights); - InitializeArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights); - InitializeArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights); - InitializeArmComputeClTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias); - InitializeArmComputeClTensorData(*m_CellBiasTensor, m_Data.m_CellBias); - InitializeArmComputeClTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias); - - if (!m_Data.m_Parameters.m_CifgEnabled) - { - InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights); - InitializeArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights); - if (m_Data.m_CellToInputWeights != nullptr) - { - InitializeArmComputeClTensorData(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights); - } - InitializeArmComputeClTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias); - } - - if (m_Data.m_Parameters.m_ProjectionEnabled) - { - InitializeArmComputeClTensorData(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights); - if (m_Data.m_ProjectionBias != nullptr) - { - InitializeArmComputeClTensorData(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias); - } - } - - if (m_Data.m_Parameters.m_PeepholeEnabled) - { - InitializeArmComputeClTensorData(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights); - InitializeArmComputeClTensorData(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights); - } - - if (m_Data.m_Parameters.m_LayerNormEnabled) - { - if (!m_Data.m_Parameters.m_CifgEnabled) - { - InitializeArmComputeClTensorData(*m_InputLayerNormWeightsTensor, m_Data.m_InputLayerNormWeights); - } - - InitializeArmComputeClTensorData(*m_ForgetLayerNormWeightsTensor, m_Data.m_ForgetLayerNormWeights); - InitializeArmComputeClTensorData(*m_CellLayerNormWeightsTensor, m_Data.m_CellLayerNormWeights); - InitializeArmComputeClTensorData(*m_OutputLayerNormWeightsTensor, m_Data.m_OutputLayerNormWeights); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_LstmLayer.prepare(); - FreeUnusedTensors(); -} - -void ClLstmFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClLstmFloatWorkload_Execute"); - RunClFunction(m_LstmLayer, CHECK_LOCATION()); -} - -arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const LstmInputParamsInfo& paramsInfo) -{ - arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info; - - // The inputs and the outputs - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn); - const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn); - const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer); - const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut); - const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - // Basic parameters - const arm_compute::TensorInfo aclInputToForgetWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights()); - const arm_compute::TensorInfo aclInputToCellWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights()); - const arm_compute::TensorInfo aclInputToOutputWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights()); - const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights()); - const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights()); - const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights()); - const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias()); - const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias()); - const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias()); - - arm_compute::TensorInfo aclInputToInputWeightsInfo; - arm_compute::TensorInfo aclRecurrentToInputWeightsInfo; - arm_compute::TensorInfo aclCellToInputWeightsInfo; - arm_compute::TensorInfo aclInputGateBiasInfo; - arm_compute::TensorInfo aclProjectionWeightsInfo; - arm_compute::TensorInfo aclProjectionBiasInfo; - arm_compute::TensorInfo aclCellToForgetWeightsInfo; - arm_compute::TensorInfo aclCellToOutputWeightsInfo; - arm_compute::TensorInfo aclInputLayerNormWeightsInfo; - arm_compute::TensorInfo aclForgetLayerNormWeightsInfo; - arm_compute::TensorInfo aclCellLayerNormWeightsInfo; - arm_compute::TensorInfo aclOutputLayerNormWeightsInfo; - - if (!descriptor.m_CifgEnabled) - { - aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights()); - aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights()); - - if (paramsInfo.m_CellToInputWeights != nullptr) - { - aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights()); - } - aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias()); - lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo, - paramsInfo.m_CellToInputWeights != nullptr ? - &aclCellToInputWeightsInfo: nullptr, - &aclInputGateBiasInfo); - } - - if (descriptor.m_ProjectionEnabled) - { - aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights()); - - if (paramsInfo.m_ProjectionBias != nullptr) - { - aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias()); - } - lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, - paramsInfo.m_ProjectionBias != nullptr ? - &aclProjectionBiasInfo: nullptr); - } - - if (descriptor.m_PeepholeEnabled) - { - aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights()); - aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights()); - lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo); - } - - float cell_threshold = descriptor.m_ClippingThresCell; - float projection_threshold = descriptor.m_ClippingThresProj; - - // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations - arm_compute::ActivationLayerInfo activationLayerInfo; - if (descriptor.m_ActivationFunc == 0) - { - // no activation, do nothing - } - else if (descriptor.m_ActivationFunc == 1) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::RELU); - } - else if (descriptor.m_ActivationFunc == 3) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); - } - else if (descriptor.m_ActivationFunc == 4) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); - } - else if (descriptor.m_ActivationFunc == 6) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); - } - else - { - throw armnn::Exception("Wrong Type of Activation Function!"); - } - - if (descriptor.m_LayerNormEnabled) - { - if (!descriptor.m_CifgEnabled) - { - aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights()); - } - - aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights()); - - aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights()); - - aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights()); - - lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? - nullptr : &aclInputLayerNormWeightsInfo, - &aclForgetLayerNormWeightsInfo, - &aclCellLayerNormWeightsInfo, - &aclOutputLayerNormWeightsInfo); - } - - return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo, - &aclInputToCellWeightsInfo, - &aclInputToOutputWeightsInfo, - &aclRecurrentToForgetWeightsInfo, - &aclRecurrentToCellWeightsInfo, - &aclRecurrentToOutputWeightsInfo, - &aclForgetGateBiasInfo, - &aclCellBiasInfo, - &aclOutputGateBiasInfo, - &aclOutputStateInInfo, &aclCellStateInInfo, - &aclScratchBufferInfo, &aclOutputStateOutInfo, - &aclCellStateOutInfo, &aclOutputInfo, - lstm_params_info, activationLayerInfo, - cell_threshold, projection_threshold); -} - -void ClLstmFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_InputToInputWeightsTensor); - FreeTensorIfUnused(m_InputToForgetWeightsTensor); - FreeTensorIfUnused(m_InputToCellWeightsTensor); - FreeTensorIfUnused(m_InputToOutputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); - FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); - FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); - FreeTensorIfUnused(m_CellToInputWeightsTensor); - FreeTensorIfUnused(m_CellToForgetWeightsTensor); - FreeTensorIfUnused(m_CellToOutputWeightsTensor); - FreeTensorIfUnused(m_InputGateBiasTensor); - FreeTensorIfUnused(m_ForgetGateBiasTensor); - FreeTensorIfUnused(m_CellBiasTensor); - FreeTensorIfUnused(m_OutputGateBiasTensor); - FreeTensorIfUnused(m_ProjectionWeightsTensor); - FreeTensorIfUnused(m_ProjectionBiasTensor); - FreeTensorIfUnused(m_ScratchBuffer); - FreeTensorIfUnused(m_InputLayerNormWeightsTensor); - FreeTensorIfUnused(m_ForgetLayerNormWeightsTensor); - FreeTensorIfUnused(m_CellLayerNormWeightsTensor); - FreeTensorIfUnused(m_OutputLayerNormWeightsTensor); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp deleted file mode 100644 index b7cb408b40..0000000000 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Descriptors.hpp> -#include <armnn/LstmParams.hpp> -#include <backendsCommon/Workload.hpp> -#include <backendsCommon/WorkloadData.hpp> - -#include <arm_compute/runtime/CL/functions/CLLSTMLayer.h> - -namespace armnn -{ - -class ClLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor> -{ -public: - ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLLSTMLayer m_LstmLayer; - - std::unique_ptr<arm_compute::CLTensor> m_InputToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToCellWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToCellWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_ForgetGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_OutputGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_ProjectionWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_ProjectionBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputLayerNormWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_ForgetLayerNormWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellLayerNormWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_OutputLayerNormWeightsTensor; - - std::unique_ptr<arm_compute::CLTensor> m_ScratchBuffer; - - void FreeUnusedTensors(); -}; - -arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor &descriptor, - const LstmInputParamsInfo& paramsInfo); -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp deleted file mode 100644 index cd3192d186..0000000000 --- a/src/backends/cl/workloads/ClMaximumWorkload.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMaximumWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClMaximumWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLElementwiseMax::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo); - - return aclStatus; -} - -ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<MaximumQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClMaximumWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_MaximumLayer.configure(&input0, &input1, &output); -} - -void ClMaximumWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMaximumWorkload_Execute"); - RunClFunction(m_MaximumLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMaximumWorkload.hpp b/src/backends/cl/workloads/ClMaximumWorkload.hpp deleted file mode 100644 index 18f67cddf6..0000000000 --- a/src/backends/cl/workloads/ClMaximumWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLElementwiseOperations.h> - -namespace armnn -{ - -arm_compute::Status ClMaximumWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClMaximumWorkload : public BaseWorkload<MaximumQueueDescriptor> -{ -public: - ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLElementwiseMax m_MaximumLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp deleted file mode 100644 index 470b6a883d..0000000000 --- a/src/backends/cl/workloads/ClMeanWorkload.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMeanWorkload.hpp" - -#include <cl/ClTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClMeanValidate(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& desc) -{ - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), - input.GetNumDimensions(), - desc.m_Axis); - - return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo); -} - -ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<MeanQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), - info.m_InputTensorInfos[0].GetNumDimensions(), - m_Data.m_Parameters.m_Axis); - - m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output); -} - -void ClMeanWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMeanWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp deleted file mode 100644 index 127c0548b1..0000000000 --- a/src/backends/cl/workloads/ClMeanWorkload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLReduceMean.h> - -namespace armnn -{ - -arm_compute::Status ClMeanValidate(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& desc); - -class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor> -{ -public: - ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - // Not using CLMeanStdDev, as 4D input tensor support for Mean has been added to a new function called CLReduceMean. - mutable arm_compute::CLReduceMean m_Layer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp deleted file mode 100644 index 5f8dfdb8eb..0000000000 --- a/src/backends/cl/workloads/ClMinimumWorkload.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMinimumWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClMinimumWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLElementwiseMin::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo); - - return aclStatus; -} - -ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<MinimumQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClMinimumWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_MinimumLayer.configure(&input0, &input1, &output); -} - -void ClMinimumWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMinimumWorkload_Execute"); - RunClFunction(m_MinimumLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMinimumWorkload.hpp b/src/backends/cl/workloads/ClMinimumWorkload.hpp deleted file mode 100644 index 55d7eea9dd..0000000000 --- a/src/backends/cl/workloads/ClMinimumWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLElementwiseOperations.h> - -namespace armnn -{ - -arm_compute::Status ClMinimumWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClMinimumWorkload : public BaseWorkload<MinimumQueueDescriptor> -{ -public: - ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLElementwiseMin m_MinimumLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp deleted file mode 100644 index f4e7b836ed..0000000000 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMultiplicationWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, - // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be - // ignored for F32 tensors. - return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1, - &aclInput2, - &aclOutput, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_ZERO); -} - - -ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<MultiplicationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClMultiplicationWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - // Construct - m_PixelWiseMultiplication.configure(&input0, - &input1, - &output, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_NEAREST_EVEN); -} - -void ClMultiplicationWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationWorkload_Execute"); - RunClFunction(m_PixelWiseMultiplication, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp deleted file mode 100644 index 732bb16dcc..0000000000 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h> - -namespace armnn -{ - -arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClMultiplicationWorkload : public BaseWorkload<MultiplicationQueueDescriptor> -{ -public: - ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); - - using BaseWorkload<MultiplicationQueueDescriptor>::BaseWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; -}; - -} //namespace armnn - - - diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp deleted file mode 100644 index 5f2fd7ab83..0000000000 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClNormalizationFloatWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include "ClWorkloadUtils.hpp" - -using namespace armnn::armcomputetensorutils; - -namespace armnn -{ - -arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor); - - return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); -} - -ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); - - m_NormalizationLayer.configure(&input, &output, normalizationInfo); -}; - -void ClNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); - RunClFunction(m_NormalizationLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp deleted file mode 100644 index a6d4f25e5e..0000000000 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLNormalizationLayer.h> - -namespace armnn -{ - -arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor); - -class ClNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor> -{ -public: - ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp deleted file mode 100644 index 8a8c34a212..0000000000 --- a/src/backends/cl/workloads/ClPadWorkload.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPadWorkload.hpp" - -#include <cl/ClTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <arm_compute/core/Types.h> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<PadQueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - - std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_Parameters.m_PadList.size()); - - std::reverse_copy(std::begin(descriptor.m_Parameters.m_PadList), - std::end(descriptor.m_Parameters.m_PadList), - std::begin(reversed_PadList)); - - arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList); - - arm_compute::PixelValue pixelValue = GetPixelValue(input, descriptor.m_Parameters.m_PadValue); - - m_Layer.configure(&input, &output, padList, pixelValue); -} - -void ClPadWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -arm_compute::Status ClPadValidate(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size()); - - std::reverse_copy(std::begin(descriptor.m_PadList), - std::end(descriptor.m_PadList), - std::begin(reversed_PadList)); - - arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList); - - const arm_compute::Status aclStatus = arm_compute::CLPadLayer::validate(&aclInputInfo, - &aclOutputInfo, - padList); - - return aclStatus; -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClPadWorkload.hpp b/src/backends/cl/workloads/ClPadWorkload.hpp deleted file mode 100644 index e87cbb71c2..0000000000 --- a/src/backends/cl/workloads/ClPadWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/WorkloadData.hpp> -#include <backendsCommon/Workload.hpp> -#include <arm_compute/runtime/CL/functions/CLPadLayer.h> - -namespace armnn { - -class ClPadWorkload : public BaseWorkload<PadQueueDescriptor> -{ -public: - ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLPadLayer m_Layer; -}; - -arm_compute::Status ClPadValidate(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor); - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp deleted file mode 100644 index 41bce1d4fa..0000000000 --- a/src/backends/cl/workloads/ClPermuteWorkload.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPermuteWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <arm_compute/core/Error.h> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClPermuteWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - const armnn::PermutationVector& mappings = descriptor.m_DimMappings; - - return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo, - armcomputetensorutils::BuildArmComputePermutationVector(mappings)); -} - -ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<PermuteQueueDescriptor>(descriptor, info) -{ - using armcomputetensorutils::BuildArmComputePermutationVector; - - m_Data.ValidateInputsOutputs(GetName(), 1, 1); - - const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - - // Run the layer. - m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); -} - -void ClPermuteWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); - RunClFunction(m_PermuteFunction, CHECK_LOCATION()); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClPermuteWorkload.hpp b/src/backends/cl/workloads/ClPermuteWorkload.hpp deleted file mode 100644 index 8b5f4c6147..0000000000 --- a/src/backends/cl/workloads/ClPermuteWorkload.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> -#include <backendsCommon/WorkloadData.hpp> - -#include <armnn/TypesUtils.hpp> -#include <arm_compute/runtime/CL/functions/CLPermute.h> - -#include <string> - -namespace armnn -{ - -arm_compute::Status ClPermuteWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor); - -class ClPermuteWorkload : public BaseWorkload<PermuteQueueDescriptor> -{ -public: - static const std::string& GetName() - { - static const std::string name = std::string("ClPermuteWorkload"); - return name; - } - - ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - using BaseWorkload<PermuteQueueDescriptor>::m_Data; - mutable arm_compute::CLPermute m_PermuteFunction; -}; - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp deleted file mode 100644 index 683880439f..0000000000 --- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPooling2dWorkload.hpp" -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); - - return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); -} - -ClPooling2dWorkload::ClPooling2dWorkload( - const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClPooling2dWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - // flag to use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - // enable fp_mixed_precision for the the FP16 cases that - // accumulation reaches a limit beyond which there is no more increment of the value - bool fpMixedPrecision = false; - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters, fpMixedPrecision); - - // Run the layer. - m_PoolingLayer.configure(&input, &output, layerInfo); -} - -void ClPooling2dWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dWorkload_Execute"); - RunClFunction(m_PoolingLayer, CHECK_LOCATION()); -} - -} diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.hpp b/src/backends/cl/workloads/ClPooling2dWorkload.hpp deleted file mode 100644 index ce67db2a13..0000000000 --- a/src/backends/cl/workloads/ClPooling2dWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLPoolingLayer.h> - -namespace armnn -{ - -arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor); - -class ClPooling2dWorkload : public BaseWorkload<Pooling2dQueueDescriptor> -{ -public: - using BaseWorkload<Pooling2dQueueDescriptor>::m_Data; - - ClPooling2dWorkload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLPoolingLayer m_PoolingLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp deleted file mode 100644 index 1813105c21..0000000000 --- a/src/backends/cl/workloads/ClPreluWorkload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPreluWorkload.hpp" -#include "ClWorkloadUtils.hpp" -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeUtils.hpp> -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> - -namespace armnn -{ - -arm_compute::Status ClPreluWorkloadValidate(const TensorInfo& input, - const TensorInfo& alpha, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLPReluLayer::validate(&aclInput, - &aclAlpha, - &aclOutput); -} - -ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<PreluQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClPreluWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& alpha = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_PreluLayer.configure(&input, &alpha, &output); -} - -void ClPreluWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPreluWorkload_Execute"); - RunClFunction(m_PreluLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClPreluWorkload.hpp b/src/backends/cl/workloads/ClPreluWorkload.hpp deleted file mode 100644 index 9061416431..0000000000 --- a/src/backends/cl/workloads/ClPreluWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLPReluLayer.h> - -namespace armnn -{ -arm_compute::Status ClPreluWorkloadValidate(const TensorInfo& input, - const TensorInfo& alpha, - const TensorInfo& output); - -class ClPreluWorkload : public BaseWorkload<PreluQueueDescriptor> -{ -public: - ClPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLPReluLayer m_PreluLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp deleted file mode 100644 index 263065a5a4..0000000000 --- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClQuantizeWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClQuantizeWorkloadValidate(const TensorInfo& input, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - return arm_compute::CLQuantizationLayer::validate(&aclInputInfo, - &aclOutputInfo); -} - -ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<QuantizeQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClQuantizeWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void ClQuantizeWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizeWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.hpp b/src/backends/cl/workloads/ClQuantizeWorkload.hpp deleted file mode 100644 index f4a7ec64dd..0000000000 --- a/src/backends/cl/workloads/ClQuantizeWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLQuantizationLayer.h> - -namespace armnn -{ - -arm_compute::Status ClQuantizeWorkloadValidate(const TensorInfo& input, - const TensorInfo& output); - -class ClQuantizeWorkload : public BaseWorkload<QuantizeQueueDescriptor> -{ -public: - ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLQuantizationLayer m_Layer; -}; - -} //namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp deleted file mode 100644 index 688ebf9184..0000000000 --- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClQuantizedLstmWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <cl/ClTensorHandle.hpp> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClQuantizedLstmWorkloadValidate(const TensorInfo& input, const TensorInfo& previousCellStateIn, - const TensorInfo& previousOutputIn, const TensorInfo& cellStateOut, - const TensorInfo& output, - const QuantizedLstmInputParamsInfo& paramsInfo) -{ - // Inputs - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclPreviousCellStateInInfo = BuildArmComputeTensorInfo(previousCellStateIn); - const arm_compute::TensorInfo aclPreviousOutputInInfo = BuildArmComputeTensorInfo(previousOutputIn); - - // Outputs - const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - // Basic parameters - const arm_compute::TensorInfo aclInputToInputWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights()); - const arm_compute::TensorInfo aclInputToForgetWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights()); - const arm_compute::TensorInfo aclInputToCellWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights()); - const arm_compute::TensorInfo aclInputToOutputWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights()); - const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights()); - const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights()); - const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights()); - const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo - = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights()); - const arm_compute::TensorInfo aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias()); - const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias()); - const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias()); - const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias()); - - return arm_compute::CLLSTMLayerQuantized::validate(&aclInputInfo, &aclInputToInputWeightsInfo, - &aclInputToForgetWeightsInfo, &aclInputToCellWeightsInfo, - &aclInputToOutputWeightsInfo, &aclRecurrentToInputWeightsInfo, - &aclRecurrentToForgetWeightsInfo, &aclRecurrentToCellWeightsInfo, - &aclRecurrentToOutputWeightsInfo, &aclInputGateBiasInfo, - &aclForgetGateBiasInfo, &aclCellBiasInfo, &aclOutputGateBiasInfo, - &aclPreviousCellStateInInfo, &aclPreviousOutputInInfo, - &aclCellStateOutInfo, &aclOutputInfo); -} - -ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor &descriptor, - const WorkloadInfo &info): - BaseWorkload<QuantizedLstmQueueDescriptor>(descriptor, info) -{ - m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); - - m_InputToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); - - m_InputToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); - - m_InputToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); - - m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); - - m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); - - m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); - - m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); - - m_InputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); - - m_ForgetGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); - - m_CellBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); - - m_OutputGateBiasTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); - - const arm_compute::ICLTensor& inputTensor = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& cellStateInTensor = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); - const arm_compute::ICLTensor& outputStateInTensor = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor(); - - arm_compute::ICLTensor& cellStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - arm_compute::ICLTensor& outputStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor(); - - m_QuantizedLstmLayer.configure(&inputTensor, m_InputToInputWeightsTensor.get(), m_InputToForgetWeightsTensor.get(), - m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), - m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), - m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), - m_InputGateBiasTensor.get(), m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), - m_OutputGateBiasTensor.get(), &cellStateInTensor, &outputStateInTensor, - &cellStateOutTensor, &outputStateOutTensor); - - InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights); - InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights); - InitializeArmComputeClTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights); - InitializeArmComputeClTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights); - InitializeArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights); - InitializeArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights); - InitializeArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights); - InitializeArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights); - InitializeArmComputeClTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias); - InitializeArmComputeClTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias); - InitializeArmComputeClTensorData(*m_CellBiasTensor, m_Data.m_CellBias); - InitializeArmComputeClTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias); - - m_QuantizedLstmLayer.prepare(); - FreeUnusedTensors(); -} - -void ClQuantizedLstmWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizedLstmWorkload_Execute"); - RunClFunction(m_QuantizedLstmLayer, CHECK_LOCATION()); -} - -void ClQuantizedLstmWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_InputToInputWeightsTensor); - FreeTensorIfUnused(m_InputToForgetWeightsTensor); - FreeTensorIfUnused(m_InputToCellWeightsTensor); - FreeTensorIfUnused(m_InputToOutputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); - FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); - FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); - FreeTensorIfUnused(m_InputGateBiasTensor); - FreeTensorIfUnused(m_ForgetGateBiasTensor); - FreeTensorIfUnused(m_CellBiasTensor); - FreeTensorIfUnused(m_OutputGateBiasTensor); -} - -} // namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp deleted file mode 100644 index 580db490d6..0000000000 --- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/QuantizedLstmParams.hpp> -#include <backendsCommon/Workload.hpp> -#include <backendsCommon/WorkloadData.hpp> - -#include <arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h> - -namespace armnn -{ - -arm_compute::Status ClQuantizedLstmWorkloadValidate(const TensorInfo& input, const TensorInfo& previousCellStateIn, - const TensorInfo& previousOutputIn, const TensorInfo& cellStateOut, - const TensorInfo& output, - const QuantizedLstmInputParamsInfo& paramsInfo); - -class ClQuantizedLstmWorkload : public BaseWorkload<QuantizedLstmQueueDescriptor> -{ -public: - ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLLSTMLayerQuantized m_QuantizedLstmLayer; - - std::unique_ptr<arm_compute::CLTensor> m_InputToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToCellWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToInputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToForgetWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToCellWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_RecurrentToOutputWeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_InputGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_ForgetGateBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_CellBiasTensor; - std::unique_ptr<arm_compute::CLTensor> m_OutputGateBiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp deleted file mode 100644 index d752290444..0000000000 --- a/src/backends/cl/workloads/ClReshapeWorkload.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClReshapeWorkload.hpp" -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClReshapeWorkloadValidate(const TensorInfo& input, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo); -} - -ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<ReshapeQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClReshapeWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void ClReshapeWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClReshapeWorkload.hpp b/src/backends/cl/workloads/ClReshapeWorkload.hpp deleted file mode 100644 index d836f1e43a..0000000000 --- a/src/backends/cl/workloads/ClReshapeWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> - -namespace armnn -{ - -arm_compute::Status ClReshapeWorkloadValidate(const TensorInfo& input, - const TensorInfo& output); - -class ClReshapeWorkload : public BaseWorkload<ReshapeQueueDescriptor> -{ -public: - ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLReshapeLayer m_Layer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp deleted file mode 100644 index 05b212c3d4..0000000000 --- a/src/backends/cl/workloads/ClResizeWorkload.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClResizeWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <cl/ClTensorHandle.hpp> - -using namespace armnn::armcomputetensorutils; - -namespace armnn -{ - -arm_compute::Status ClResizeWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ResizeDescriptor& descriptor) -{ - arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout); - aclInputInfo.set_data_layout(aclDataLayout); - aclOutputInfo.set_data_layout(aclDataLayout); - - arm_compute::InterpolationPolicy aclInterpolationPolicy = - ConvertResizeMethodToAclInterpolationPolicy(descriptor.m_Method); - - return arm_compute::CLScale::validate(&aclInputInfo, - &aclOutputInfo, - aclInterpolationPolicy, - arm_compute::BorderMode::REPLICATE, - arm_compute::PixelValue(0.f), - arm_compute::SamplingPolicy::TOP_LEFT, - true, - descriptor.m_BilinearAlignCorners); -} - -ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) : - BaseWorkload<ResizeQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClResizeWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - arm_compute::InterpolationPolicy aclInterpolationPolicy = - ConvertResizeMethodToAclInterpolationPolicy(descriptor.m_Parameters.m_Method); - - m_ResizeLayer.configure(&input, - &output, - aclInterpolationPolicy, - arm_compute::BorderMode::REPLICATE, - arm_compute::PixelValue(0.f), - arm_compute::SamplingPolicy::TOP_LEFT, - true, - descriptor.m_Parameters.m_BilinearAlignCorners); -}; - -void ClResizeWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeWorkload_Execute"); - RunClFunction(m_ResizeLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClResizeWorkload.hpp b/src/backends/cl/workloads/ClResizeWorkload.hpp deleted file mode 100644 index ab5b943457..0000000000 --- a/src/backends/cl/workloads/ClResizeWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLScale.h> - -namespace armnn -{ - -arm_compute::Status ClResizeWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ResizeDescriptor& descriptor); - -class ClResizeWorkload : public BaseWorkload<ResizeQueueDescriptor> -{ -public: - ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLScale m_ResizeLayer; -}; - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp deleted file mode 100644 index be687595f7..0000000000 --- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClRsqrtWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <cl/ClTensorHandle.hpp> - -#include <boost/cast.hpp> - -namespace armnn -{ - -arm_compute::Status ClRsqrtWorkloadValidate(const TensorInfo& input, const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput); -} - -ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<RsqrtQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClRsqrtWorkload", 1, 1); - - arm_compute::ICLTensor& input = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - m_RsqrtLayer.configure(&input, &output); -} - -void ClRsqrtWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClRsqrtWorkload_Execute"); - RunClFunction(m_RsqrtLayer, CHECK_LOCATION()); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.hpp b/src/backends/cl/workloads/ClRsqrtWorkload.hpp deleted file mode 100644 index 8fb6229160..0000000000 --- a/src/backends/cl/workloads/ClRsqrtWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/core/Error.h> -#include <arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h> - -namespace armnn -{ - -arm_compute::Status ClRsqrtWorkloadValidate(const TensorInfo& input, const TensorInfo& output); - -class ClRsqrtWorkload : public BaseWorkload<RsqrtQueueDescriptor> -{ -public: - ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLRsqrtLayer m_RsqrtLayer; -}; - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp deleted file mode 100644 index fa99e7f54d..0000000000 --- a/src/backends/cl/workloads/ClSliceWorkload.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSliceWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <cl/ClTensorHandle.hpp> - -#include <boost/cast.hpp> - -namespace armnn -{ - -arm_compute::Status ClSliceWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SliceDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - arm_compute::Coordinates starts; - arm_compute::Coordinates ends; - - std::tie(starts, ends) = SetClSliceData(descriptor.m_Begin, descriptor.m_Size); - - return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends); -} - -ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<SliceQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1); - - arm_compute::ICLTensor& input = boost::polymorphic_downcast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = boost::polymorphic_downcast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::Coordinates starts; - arm_compute::Coordinates ends; - - std::tie(starts, ends) = SetClSliceData(m_Data.m_Parameters.m_Begin, m_Data.m_Parameters.m_Size); - - m_SliceFunction.configure(&input, &output, starts, ends); -} - -void ClSliceWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSliceWorkload_Execute"); - RunClFunction(m_SliceFunction, CHECK_LOCATION()); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClSliceWorkload.hpp b/src/backends/cl/workloads/ClSliceWorkload.hpp deleted file mode 100644 index 3460b7788b..0000000000 --- a/src/backends/cl/workloads/ClSliceWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/core/Error.h> -#include <arm_compute/runtime/CL/functions/CLSlice.h> - -namespace armnn -{ - -arm_compute::Status ClSliceWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SliceDescriptor& descriptor); - -class ClSliceWorkload : public BaseWorkload<SliceQueueDescriptor> -{ -public: - ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLSlice m_SliceFunction; -}; - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp deleted file mode 100644 index a355ba0c2d..0000000000 --- a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxBaseWorkload.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <aclCommon/ArmComputeUtils.hpp> - -#include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h> - -namespace armnn -{ - -arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - unsigned int aclAxis = ComputeSoftmaxAclAxis(descriptor, input); - return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis); -} - -} diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp deleted file mode 100644 index 8d73060162..0000000000 --- a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Descriptors.hpp> -#include <armnn/Tensor.hpp> -#include <arm_compute/core/Error.h> - -namespace armnn -{ - -arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor); - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp deleted file mode 100644 index adb4872b80..0000000000 --- a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxFloatWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -namespace armnn -{ - -ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - unsigned int aclAxis = ComputeSoftmaxAclAxis(m_Data.m_Parameters, info.m_InputTensorInfos[0]); - m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis); -} - -void ClSoftmaxFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute"); - RunClFunction(m_SoftmaxLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp deleted file mode 100644 index 7efdae858a..0000000000 --- a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ - -class ClSoftmaxFloatWorkload : public FloatWorkload<SoftmaxQueueDescriptor> -{ -public: - ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp deleted file mode 100644 index f14ea11c82..0000000000 --- a/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxUint8Workload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -namespace armnn -{ - -ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - const auto outputQuantization = output.info()->quantization_info(); - - if ((!outputQuantization.scale().empty() && outputQuantization.scale()[0] != (1.0f / 256.0f)) || - (!outputQuantization.offset().empty() && outputQuantization.offset()[0] != 0) || - outputQuantization.scale().empty() || outputQuantization.offset().empty()) - { - throw InvalidArgumentException( - "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); - } - - unsigned int aclAxis = ComputeSoftmaxAclAxis(m_Data.m_Parameters, info.m_InputTensorInfos[0]); - m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta, aclAxis); -} - -void ClSoftmaxUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute"); - RunClFunction(m_SoftmaxLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp deleted file mode 100644 index f378b89a5d..0000000000 --- a/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ -// Softmax -class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> -{ -public: - ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - void Execute() const override; -private: - - mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp deleted file mode 100644 index 64da92c815..0000000000 --- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSpaceToBatchNdWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SpaceToBatchNdDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = boost::numeric_cast<int32_t>(descriptor.m_BlockShape[0]); - int32_t blockWidth = boost::numeric_cast<int32_t>(descriptor.m_BlockShape[1]); - - arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D( - descriptor.m_PadList[1].first, descriptor.m_PadList[0].first); - arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D( - descriptor.m_PadList[1].second, descriptor.m_PadList[0].second); - - return arm_compute::CLSpaceToBatchLayer::validate(&aclInputInfo, - blockWidth, - blockHeight, - paddingLeftTop, - paddingRightBottom, - &aclOutputInfo); -} - -ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload( - const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1); - - arm_compute::ICLTensor& input = - boost::polymorphic_pointer_downcast<IClTensorHandle>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = - boost::polymorphic_pointer_downcast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor(); - - // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = boost::numeric_cast<int32_t>(m_Data.m_Parameters.m_BlockShape[0]); - int32_t blockWidth = boost::numeric_cast<int32_t>(m_Data.m_Parameters.m_BlockShape[1]); - - arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D( - m_Data.m_Parameters.m_PadList[1].first, m_Data.m_Parameters.m_PadList[0].first); - arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D( - m_Data.m_Parameters.m_PadList[1].second, m_Data.m_Parameters.m_PadList[0].second); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - m_SpaceToBatchLayer.configure(&input, - blockWidth, - blockHeight, - paddingLeftTop, - paddingRightBottom, - &output); -} - -void ClSpaceToBatchNdWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToBatchNdWorkload_Execute"); - RunClFunction(m_SpaceToBatchLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp deleted file mode 100644 index 7500b5a3ac..0000000000 --- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> -#include <armnn/Descriptors.hpp> - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h> - -namespace armnn -{ - -arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SpaceToBatchNdDescriptor& descriptor); - -class ClSpaceToBatchNdWorkload : public BaseWorkload<SpaceToBatchNdQueueDescriptor> -{ -public: - ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLSpaceToBatchLayer m_SpaceToBatchLayer; -}; - -} //namespace armnn - diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp deleted file mode 100644 index d541e4ec52..0000000000 --- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSpaceToDepthWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClTensorHandle.hpp> -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc, - const WorkloadInfo& info) - : BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info) -{ - m_Data.ValidateInputsOutputs("ClSpaceToDepthWorkload", 1, 1); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - input.info()->set_data_layout(aclDataLayout); - - int32_t blockSize = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize); - - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - output.info()->set_data_layout(aclDataLayout); - - m_Layer.configure(&input, &output, blockSize); -} - -void ClSpaceToDepthWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToDepthWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SpaceToDepthDescriptor& desc) -{ - DataLayout dataLayout = desc.m_DataLayout; - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout); - - int32_t blockSize = boost::numeric_cast<int32_t>(desc.m_BlockSize); - - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout); - - const arm_compute::Status aclStatus = arm_compute::CLSpaceToDepthLayer::validate(&aclInputInfo, - &aclOutputInfo, - blockSize); - return aclStatus; -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp deleted file mode 100644 index 57ce5d4d05..0000000000 --- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Descriptors.hpp> - -#include <backendsCommon/Workload.hpp> -#include <arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h> - -namespace armnn -{ -arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const SpaceToDepthDescriptor& desc); - -class ClSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor> -{ -public: - ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLSpaceToDepthLayer m_Layer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp deleted file mode 100644 index 296e0a3dde..0000000000 --- a/src/backends/cl/workloads/ClSplitterWorkload.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSplitterWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <aclCommon/ArmComputeUtils.hpp> -#include <arm_compute/runtime/CL/functions/CLSplit.h> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClTensorHandle.hpp> - - -namespace armnn -{ - -using namespace armcomputetensorutils; - -namespace -{ - unsigned int CalcAclAxis(unsigned int numDimensions, unsigned int splitAxis) - { - return (numDimensions - splitAxis) - 1; - } - -} //namespace - -arm_compute::Status ClSplitterWorkloadValidate(const TensorInfo& input, - const std::vector<std::reference_wrapper<TensorInfo>>& outputs, - unsigned int splitAxis) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - - size_t numOutputs = outputs.size(); - - std::vector<arm_compute::TensorInfo> aclOutputs; - aclOutputs.reserve(numOutputs); - - std::vector<arm_compute::ITensorInfo*> aclOutputPtr; - aclOutputPtr.reserve(numOutputs); - - for (size_t i = 0u; i < outputs.size(); ++i) - { - aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i])); - aclOutputPtr.emplace_back(&aclOutputs.back()); - } - - unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis); - return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis); -} - -ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload<SplitterQueueDescriptor>(descriptor, info) -{ - bool allOutputsAreSubtensors = true; - - // Check that all outputs are sub-tensors - for (auto output : m_Data.m_Outputs) - { - if (output && !output->GetParent()) - { - // Non sub-tensor input found so we need to execute the split function - allOutputsAreSubtensors = false; - break; - } - } - - if (allOutputsAreSubtensors) - { - // Can skip configuring the split function since it's not executed - return; - } - - arm_compute::ICLTensor& input = boost::polymorphic_pointer_downcast<IClTensorHandle>( - m_Data.m_Inputs[0])->GetTensor(); - - std::vector<arm_compute::ICLTensor *> aclOutputs; - for (auto output : m_Data.m_Outputs) - { - arm_compute::ICLTensor& aclOutput = boost::polymorphic_pointer_downcast<IClTensorHandle>(output)->GetTensor(); - aclOutputs.emplace_back(&aclOutput); - } - - // Create the layer function - - // Configure input and output tensors - std::set<unsigned int> splitAxis = ComputeSplitAxis(descriptor.m_Parameters, m_Data.m_Inputs[0]->GetShape()); - if (splitAxis.size() != 1) - { - throw InvalidArgumentException("Cannot derive split axis from SplitterDescriptor"); - } - - unsigned int aclAxis = CalcAclAxis(descriptor.m_Parameters.GetNumDimensions(), *splitAxis.begin()); - auto layer = std::make_unique<arm_compute::CLSplit>(); - layer->configure(&input, aclOutputs, aclAxis); - - // Prepare - layer->prepare(); - - m_Layer = std::move(layer); -} - -void ClSplitterWorkload::Execute() const -{ - if (m_Layer) - { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterWorkload_Execute"); - m_Layer->run(); - } -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterWorkload.hpp b/src/backends/cl/workloads/ClSplitterWorkload.hpp deleted file mode 100644 index 82211f5baf..0000000000 --- a/src/backends/cl/workloads/ClSplitterWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/core/Error.h> -#include <arm_compute/runtime/IFunction.h> - -#include <functional> - -namespace armnn -{ - -arm_compute::Status ClSplitterWorkloadValidate(const TensorInfo& input, - const std::vector<std::reference_wrapper<TensorInfo>>& outputs, - unsigned int splitAxis); - -class ClSplitterWorkload : public BaseWorkload<SplitterQueueDescriptor> -{ -public: - ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - std::unique_ptr<arm_compute::IFunction> m_Layer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp deleted file mode 100644 index 3ba698ec4d..0000000000 --- a/src/backends/cl/workloads/ClStackWorkload.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "ClStackWorkload.hpp" -#include "ClWorkloadUtils.hpp" -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -#include <arm_compute/core/Types.h> - -#include <boost/numeric/conversion/cast.hpp> -#include <boost/polymorphic_pointer_cast.hpp> - -namespace armnn -{ -using namespace armcomputetensorutils; - -namespace -{ -int CalcAxis(const unsigned int axis, const unsigned int inputDimensions) -{ - const int intAxis = boost::numeric_cast<int>(axis); - return boost::numeric_cast<int>(inputDimensions) - intAxis; -} -} //namespace - -arm_compute::Status ClStackWorkloadValidate(const std::vector<const TensorInfo*>& inputs, - const TensorInfo& output, - const StackDescriptor& descriptor) -{ - std::vector<arm_compute::ITensorInfo*> aclInputPtrs; - arm_compute::TensorInfo aclInputInfo; - for (const TensorInfo* input : inputs) - { - aclInputInfo = BuildArmComputeTensorInfo(*input); - aclInputPtrs.emplace_back(&aclInputInfo); - } - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions()); - - return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo); -} - -ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info) -: BaseWorkload<StackQueueDescriptor>(descriptor, info) -{ - std::vector<arm_compute::ICLTensor*> aclInputs; - for (auto input : m_Data.m_Inputs) - { - arm_compute::ICLTensor& aclInput = boost::polymorphic_pointer_downcast<IClTensorHandle>(input)->GetTensor(); - aclInputs.emplace_back(&aclInput); - } - arm_compute::ICLTensor& output = boost::polymorphic_pointer_downcast<IClTensorHandle>( - m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.reset(new arm_compute::CLStackLayer()); - int aclAxis = CalcAxis(descriptor.m_Parameters.m_Axis, descriptor.m_Parameters.m_InputShape.GetNumDimensions()); - m_Layer->configure(aclInputs, aclAxis, &output); -} - -void ClStackWorkload::Execute() const -{ - if (m_Layer) - { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClStackWorkload_Execute"); - m_Layer->run(); - } -} - -} //namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClStackWorkload.hpp b/src/backends/cl/workloads/ClStackWorkload.hpp deleted file mode 100644 index f27d6cdad0..0000000000 --- a/src/backends/cl/workloads/ClStackWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLStackLayer.h> - -namespace armnn -{ -arm_compute::Status ClStackWorkloadValidate(const std::vector<const TensorInfo*>& inputs, - const TensorInfo& output, - const StackDescriptor& descriptor); - -class ClStackWorkload : public BaseWorkload<StackQueueDescriptor> -{ -public: - ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable std::unique_ptr<arm_compute::CLStackLayer> m_Layer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp deleted file mode 100644 index 6b0a34d90e..0000000000 --- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClStridedSliceWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> -#include <backendsCommon/WorkloadUtils.hpp> - -#include <boost/numeric/conversion/cast.hpp> -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClStridedSliceWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const StridedSliceDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - arm_compute::Coordinates starts; - arm_compute::Coordinates ends; - arm_compute::Coordinates strides; - - std::tie(starts, ends, strides) = SetClStridedSliceData(descriptor.m_Begin, descriptor.m_End, descriptor.m_Stride); - - auto numDimensions = boost::numeric_cast<int>(input.GetNumDimensions()); - int32_t begin_mask = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions); - int32_t end_mask = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions); - int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions); - - return arm_compute::CLStridedSlice::validate(&aclInputInfo, - &aclOutputInfo, - starts, - ends, - strides, - begin_mask, - end_mask, - shrink_axis_mask); -} - -ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClStridedSliceWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::Coordinates starts; - arm_compute::Coordinates ends; - arm_compute::Coordinates strides; - - std::tie(starts, ends, strides) = SetClStridedSliceData(m_Data.m_Parameters.m_Begin, - m_Data.m_Parameters.m_End, - m_Data.m_Parameters.m_Stride); - - auto numDimensions = boost::numeric_cast<int>(info.m_InputTensorInfos[0].GetNumDimensions()); - int32_t begin_mask = ConvertMaskToACLFormat(m_Data.m_Parameters.m_BeginMask, numDimensions); - int32_t end_mask = ConvertMaskToACLFormat(m_Data.m_Parameters.m_EndMask, numDimensions); - int32_t shrink_axis_mask = ConvertMaskToACLFormat(m_Data.m_Parameters.m_ShrinkAxisMask, numDimensions); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - m_StridedSliceLayer.configure(&input, - &output, - starts, - ends, - strides, - begin_mask, - end_mask, - shrink_axis_mask); -} - -void ClStridedSliceWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClStridedSliceWorkload_Execute"); - RunClFunction(m_StridedSliceLayer, CHECK_LOCATION()); -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp b/src/backends/cl/workloads/ClStridedSliceWorkload.hpp deleted file mode 100644 index bce3fe13eb..0000000000 --- a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> -#include <armnn/Descriptors.hpp> - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLStridedSlice.h> - -namespace armnn -{ - -arm_compute::Status ClStridedSliceWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const StridedSliceDescriptor& descriptor); - -class ClStridedSliceWorkload : public BaseWorkload<StridedSliceQueueDescriptor> -{ -public: - ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLStridedSlice m_StridedSliceLayer; -}; - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp deleted file mode 100644 index 38154eb4d7..0000000000 --- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSubtractionWorkload.hpp" - -#include <cl/ClTensorHandle.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : BaseWorkload<SubtractionQueueDescriptor>(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); -} - -void ClSubtractionWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -arm_compute::Status ClSubtractionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo, - g_AclConvertPolicy); - - return aclStatus; -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp deleted file mode 100644 index da6d17c6ac..0000000000 --- a/src/backends/cl/workloads/ClSubtractionWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLElementwiseOperations.h> - -namespace armnn -{ - -class ClSubtractionWorkload : public BaseWorkload<SubtractionQueueDescriptor> -{ -public: - ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticSubtraction m_Layer; -}; - -arm_compute::Status ClSubtractionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); -} //namespace armnn diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp deleted file mode 100644 index 7c0736645b..0000000000 --- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp +++ /dev/null @@ -1,110 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClTransposeConvolution2dWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -#include <cl/ClLayerSupport.hpp> -#include <cl/ClTensorHandle.hpp> -#include <cl/ClLayerSupport.hpp> - -#include <aclCommon/ArmComputeUtils.hpp> -#include <aclCommon/ArmComputeTensorUtils.hpp> - -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h> - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClTransposeConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TransposeConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) -{ - arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.has_value()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(descriptor); - - return arm_compute::CLDeconvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - padStrideInfo); -} - -ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload( - const TransposeConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) : - BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info), - m_Layer(memoryManager) -{ - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_WeightsTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_WeightsTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasesTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - } - - m_Data.ValidateInputsOutputs("ClTransposeConvolution2dWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); - m_Layer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo); - - InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); - if (m_BiasesTensor) - { - InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias); - } - - m_Layer.prepare(); - - FreeUnusedTensors(); -} - -void ClTransposeConvolution2dWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClTransposeConvolution2dWorkload_Execute"); - RunClFunction(m_Layer, CHECK_LOCATION()); -} - -void ClTransposeConvolution2dWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - -} // namespace armnn diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp deleted file mode 100644 index b7320bf6ce..0000000000 --- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <armnn/Tensor.hpp> -#include <armnn/Descriptors.hpp> - -#include <backendsCommon/Workload.hpp> - -#include <arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h> -#include <arm_compute/runtime/MemoryManagerOnDemand.h> - -#include <memory> - -namespace armnn -{ - -arm_compute::Status ClTransposeConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TransposeConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases); - -class ClTransposeConvolution2dWorkload : public BaseWorkload<TransposeConvolution2dQueueDescriptor> -{ -public: - ClTransposeConvolution2dWorkload(const TransposeConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); - - void Execute() const override; - -private: - mutable arm_compute::CLDeconvolutionLayer m_Layer; - - std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor; - std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor; - - void FreeUnusedTensors(); -}; - -} // namespace armnn - diff --git a/src/backends/cl/workloads/ClWorkloadUtils.hpp b/src/backends/cl/workloads/ClWorkloadUtils.hpp deleted file mode 100644 index b4bcc1c017..0000000000 --- a/src/backends/cl/workloads/ClWorkloadUtils.hpp +++ /dev/null @@ -1,143 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include <Half.hpp> - -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <cl/OpenClTimer.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> - -#include <armnn/Utils.hpp> - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/runtime/IFunction.h> - -#include <sstream> - -#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ - ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ - name, \ - armnn::OpenClTimer(), \ - armnn::WallClockTimer()) - -namespace armnn -{ - -template <typename T> -void CopyArmComputeClTensorData(arm_compute::CLTensor& dstTensor, const T* srcData) -{ - { - ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting"); - dstTensor.map(true); - } - - { - ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor"); - armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor); - } - - dstTensor.unmap(); -} - -inline auto SetClStridedSliceData(const std::vector<int>& m_begin, - const std::vector<int>& m_end, - const std::vector<int>& m_stride) -{ - arm_compute::Coordinates starts; - arm_compute::Coordinates ends; - arm_compute::Coordinates strides; - - unsigned int num_dims = static_cast<unsigned int>(m_begin.size()); - - for (unsigned int i = 0; i < num_dims; i++) { - unsigned int revertedIndex = num_dims - i - 1; - - starts.set(i, static_cast<int>(m_begin[revertedIndex])); - ends.set(i, static_cast<int>(m_end[revertedIndex])); - strides.set(i, static_cast<int>(m_stride[revertedIndex])); - } - - return std::make_tuple(starts, ends, strides); -} - -inline auto SetClSliceData(const std::vector<unsigned int>& m_begin, - const std::vector<unsigned int>& m_size) -{ - // This function must translate the size vector given to an end vector - // expected by the ACL NESlice workload - arm_compute::Coordinates starts; - arm_compute::Coordinates ends; - - unsigned int num_dims = static_cast<unsigned int>(m_begin.size()); - - // For strided slices, we have the relationship size = (end - begin) / stride - // For slice, we assume stride to be a vector of all ones, yielding the formula - // size = (end - begin) therefore we know end = size + begin - for (unsigned int i = 0; i < num_dims; i++) - { - unsigned int revertedIndex = num_dims - i - 1; - - starts.set(i, static_cast<int>(m_begin[revertedIndex])); - ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex])); - } - - return std::make_tuple(starts, ends); -} - -inline void InitializeArmComputeClTensorData(arm_compute::CLTensor& clTensor, - const ConstCpuTensorHandle* handle) -{ - BOOST_ASSERT(handle); - - armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); - switch(handle->GetTensorInfo().GetDataType()) - { - case DataType::Float16: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>()); - break; - case DataType::Float32: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>()); - break; - case DataType::QAsymmS8: - case DataType::QAsymmU8: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>()); - break; - ARMNN_NO_DEPRECATE_WARN_BEGIN - case DataType::QuantizedSymm8PerAxis: - ARMNN_FALLTHROUGH; - case DataType::QSymmS8: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int8_t>()); - break; - ARMNN_NO_DEPRECATE_WARN_END - case DataType::Signed32: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>()); - break; - default: - BOOST_ASSERT_MSG(false, "Unexpected tensor type."); - } -}; - -inline RuntimeException WrapClError(const cl::Error& clError, const CheckLocation& location) -{ - std::stringstream message; - message << "CL error: " << clError.what() << ". Error code: " << clError.err(); - - return RuntimeException(message.str(), location); -} - -inline void RunClFunction(arm_compute::IFunction& function, const CheckLocation& location) -{ - try - { - function.run(); - } - catch (cl::Error& error) - { - throw WrapClError(error, location); - } -} - -} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp deleted file mode 100644 index 014dc3f99e..0000000000 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once -#include "ClAbsWorkload.hpp" -#include "ClActivationWorkload.hpp" -#include "ClAdditionWorkload.hpp" -#include "ClArgMinMaxWorkload.hpp" -#include "ClConstantWorkload.hpp" -#include "ClBatchNormalizationFloatWorkload.hpp" -#include "ClBatchToSpaceNdWorkload.hpp" -#include "ClConvolution2dWorkload.hpp" -#include "ClDepthToSpaceWorkload.hpp" -#include "ClDepthwiseConvolutionWorkload.hpp" -#include "ClDequantizeWorkload.hpp" -#include "ClDivisionFloatWorkload.hpp" -#include "ClFloorFloatWorkload.hpp" -#include "ClFullyConnectedWorkload.hpp" -#include "ClGreaterWorkload.hpp" -#include "ClInstanceNormalizationWorkload.hpp" -#include "ClL2NormalizationFloatWorkload.hpp" -#include "ClLstmFloatWorkload.hpp" -#include "ClConcatWorkload.hpp" -#include "ClMaximumWorkload.hpp" -#include "ClMeanWorkload.hpp" -#include "ClMinimumWorkload.hpp" -#include "ClMultiplicationWorkload.hpp" -#include "ClNormalizationFloatWorkload.hpp" -#include "ClPermuteWorkload.hpp" -#include "ClPadWorkload.hpp" -#include "ClPooling2dWorkload.hpp" -#include "ClPreluWorkload.hpp" -#include "ClQuantizeWorkload.hpp" -#include "ClQuantizedLstmWorkload.hpp" -#include "ClReshapeWorkload.hpp" -#include "ClResizeWorkload.hpp" -#include "ClRsqrtWorkload.hpp" -#include "ClSliceWorkload.hpp" -#include "ClSoftmaxFloatWorkload.hpp" -#include "ClSoftmaxUint8Workload.hpp" -#include "ClSpaceToBatchNdWorkload.hpp" -#include "ClSpaceToDepthWorkload.hpp" -#include "ClSplitterWorkload.hpp" -#include "ClStackWorkload.hpp" -#include "ClStridedSliceWorkload.hpp" -#include "ClSubtractionWorkload.hpp" -#include "ClConvertFp16ToFp32Workload.hpp" -#include "ClConvertFp32ToFp16Workload.hpp" -#include "ClTransposeConvolution2dWorkload.hpp" |